1
2 /*--------------------------------------------------------------------*/
3 /*--- begin guest_x86_toIR.c ---*/
4 /*--------------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2013 OpenWorks LLP
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
34 */
35
36 /* Translates x86 code to IR. */
37
38 /* TODO:
39
40 All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked
41 to ensure a 32-bit value is being written.
42
43 FUCOMI(P): what happens to A and S flags? Currently are forced
44 to zero.
45
46 x87 FP Limitations:
47
48 * all arithmetic done at 64 bits
49
50 * no FP exceptions, except for handling stack over/underflow
51
52 * FP rounding mode observed only for float->int conversions
53 and int->float conversions which could lose accuracy, and
54 for float-to-float rounding. For all other operations,
55 round-to-nearest is used, regardless.
56
57 * some of the FCOM cases could do with testing -- not convinced
58 that the args are the right way round.
59
60 * FSAVE does not re-initialise the FPU; it should do
61
62 * FINIT not only initialises the FPU environment, it also
63 zeroes all the FP registers. It should leave the registers
64 unchanged.
65
66 SAHF should cause eflags[1] == 1, and in fact it produces 0. As
67 per Intel docs this bit has no meaning anyway. Since PUSHF is the
68 only way to observe eflags[1], a proper fix would be to make that
69 bit be set by PUSHF.
70
71 The state of %eflags.AC (alignment check, bit 18) is recorded by
72 the simulation (viz, if you set it with popf then a pushf produces
73 the value you set it to), but it is otherwise ignored. In
74 particular, setting it to 1 does NOT cause alignment checking to
75 happen. Programs that set it to 1 and then rely on the resulting
76 SIGBUSs to inform them of misaligned accesses will not work.
77
78 Implementation of sysenter is necessarily partial. sysenter is a
79 kind of system call entry. When doing a sysenter, the return
80 address is not known -- that is something that is beyond Vex's
81 knowledge. So the generated IR forces a return to the scheduler,
82 which can do what it likes to simulate the systenter, but it MUST
83 set this thread's guest_EIP field with the continuation address
84 before resuming execution. If that doesn't happen, the thread will
85 jump to address zero, which is probably fatal.
86
87 This module uses global variables and so is not MT-safe (if that
88 should ever become relevant).
89
90 The delta values are 32-bit ints, not 64-bit ints. That means
91 this module may not work right if run on a 64-bit host. That should
92 be fixed properly, really -- if anyone ever wants to use Vex to
93 translate x86 code for execution on a 64-bit host.
94
95 casLE (implementation of lock-prefixed insns) and rep-prefixed
96 insns: the side-exit back to the start of the insn is done with
97 Ijk_Boring. This is quite wrong, it should be done with
98 Ijk_NoRedir, since otherwise the side exit, which is intended to
99 restart the instruction for whatever reason, could go somewhere
100 entirely else. Doing it right (with Ijk_NoRedir jumps) would make
101 no-redir jumps performance critical, at least for rep-prefixed
102 instructions, since all iterations thereof would involve such a
103 jump. It's not such a big deal with casLE since the side exit is
104 only taken if the CAS fails, that is, the location is contended,
105 which is relatively unlikely.
106
107 XXXX: Nov 2009: handling of SWP on ARM suffers from the same
108 problem.
109
110 Note also, the test for CAS success vs failure is done using
111 Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary
112 Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it
113 shouldn't definedness-check these comparisons. See
114 COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for
115 background/rationale.
116 */
117
118 /* Performance holes:
119
120 - fcom ; fstsw %ax ; sahf
121 sahf does not update the O flag (sigh) and so O needs to
122 be computed. This is done expensively; it would be better
123 to have a calculate_eflags_o helper.
124
125 - emwarns; some FP codes can generate huge numbers of these
126 if the fpucw is changed in an inner loop. It would be
127 better for the guest state to have an emwarn-enable reg
128 which can be set zero or nonzero. If it is zero, emwarns
129 are not flagged, and instead control just flows all the
130 way through bbs as usual.
131 */
132
133 /* "Special" instructions.
134
135 This instruction decoder can decode three special instructions
136 which mean nothing natively (are no-ops as far as regs/mem are
137 concerned) but have meaning for supporting Valgrind. A special
138 instruction is flagged by the 12-byte preamble C1C703 C1C70D C1C71D
139 C1C713 (in the standard interpretation, that means: roll $3, %edi;
140 roll $13, %edi; roll $29, %edi; roll $19, %edi). Following that,
141 one of the following 3 are allowed (standard interpretation in
142 parentheses):
143
144 87DB (xchgl %ebx,%ebx) %EDX = client_request ( %EAX )
145 87C9 (xchgl %ecx,%ecx) %EAX = guest_NRADDR
146 87D2 (xchgl %edx,%edx) call-noredir *%EAX
147 87FF (xchgl %edi,%edi) IR injection
148
149 Any other bytes following the 12-byte preamble are illegal and
150 constitute a failure in instruction decoding. This all assumes
151 that the preamble will never occur except in specific code
152 fragments designed for Valgrind to catch.
153
154 No prefixes may precede a "Special" instruction.
155 */
156
157 /* LOCK prefixed instructions. These are translated using IR-level
158 CAS statements (IRCAS) and are believed to preserve atomicity, even
159 from the point of view of some other process racing against a
160 simulated one (presumably they communicate via a shared memory
161 segment).
162
163 Handlers which are aware of LOCK prefixes are:
164 dis_op2_G_E (add, or, adc, sbb, and, sub, xor)
165 dis_cmpxchg_G_E (cmpxchg)
166 dis_Grp1 (add, or, adc, sbb, and, sub, xor)
167 dis_Grp3 (not, neg)
168 dis_Grp4 (inc, dec)
169 dis_Grp5 (inc, dec)
170 dis_Grp8_Imm (bts, btc, btr)
171 dis_bt_G_E (bts, btc, btr)
172 dis_xadd_G_E (xadd)
173 */
174
175
176 #include "libvex_basictypes.h"
177 #include "libvex_ir.h"
178 #include "libvex.h"
179 #include "libvex_guest_x86.h"
180
181 #include "main_util.h"
182 #include "main_globals.h"
183 #include "guest_generic_bb_to_IR.h"
184 #include "guest_generic_x87.h"
185 #include "guest_x86_defs.h"
186
187
188 /*------------------------------------------------------------*/
189 /*--- Globals ---*/
190 /*------------------------------------------------------------*/
191
192 /* These are set at the start of the translation of an insn, right
193 down in disInstr_X86, so that we don't have to pass them around
194 endlessly. They are all constant during the translation of any
195 given insn. */
196
197 /* We need to know this to do sub-register accesses correctly. */
198 static Bool host_is_bigendian;
199
200 /* Pointer to the guest code area (points to start of BB, not to the
201 insn being processed). */
202 static UChar* guest_code;
203
204 /* The guest address corresponding to guest_code[0]. */
205 static Addr32 guest_EIP_bbstart;
206
207 /* The guest address for the instruction currently being
208 translated. */
209 static Addr32 guest_EIP_curr_instr;
210
211 /* The IRSB* into which we're generating code. */
212 static IRSB* irsb;
213
214
215 /*------------------------------------------------------------*/
216 /*--- Debugging output ---*/
217 /*------------------------------------------------------------*/
218
219 #define DIP(format, args...) \
220 if (vex_traceflags & VEX_TRACE_FE) \
221 vex_printf(format, ## args)
222
223 #define DIS(buf, format, args...) \
224 if (vex_traceflags & VEX_TRACE_FE) \
225 vex_sprintf(buf, format, ## args)
226
227
228 /*------------------------------------------------------------*/
229 /*--- Offsets of various parts of the x86 guest state. ---*/
230 /*------------------------------------------------------------*/
231
232 #define OFFB_EAX offsetof(VexGuestX86State,guest_EAX)
233 #define OFFB_EBX offsetof(VexGuestX86State,guest_EBX)
234 #define OFFB_ECX offsetof(VexGuestX86State,guest_ECX)
235 #define OFFB_EDX offsetof(VexGuestX86State,guest_EDX)
236 #define OFFB_ESP offsetof(VexGuestX86State,guest_ESP)
237 #define OFFB_EBP offsetof(VexGuestX86State,guest_EBP)
238 #define OFFB_ESI offsetof(VexGuestX86State,guest_ESI)
239 #define OFFB_EDI offsetof(VexGuestX86State,guest_EDI)
240
241 #define OFFB_EIP offsetof(VexGuestX86State,guest_EIP)
242
243 #define OFFB_CC_OP offsetof(VexGuestX86State,guest_CC_OP)
244 #define OFFB_CC_DEP1 offsetof(VexGuestX86State,guest_CC_DEP1)
245 #define OFFB_CC_DEP2 offsetof(VexGuestX86State,guest_CC_DEP2)
246 #define OFFB_CC_NDEP offsetof(VexGuestX86State,guest_CC_NDEP)
247
248 #define OFFB_FPREGS offsetof(VexGuestX86State,guest_FPREG[0])
249 #define OFFB_FPTAGS offsetof(VexGuestX86State,guest_FPTAG[0])
250 #define OFFB_DFLAG offsetof(VexGuestX86State,guest_DFLAG)
251 #define OFFB_IDFLAG offsetof(VexGuestX86State,guest_IDFLAG)
252 #define OFFB_ACFLAG offsetof(VexGuestX86State,guest_ACFLAG)
253 #define OFFB_FTOP offsetof(VexGuestX86State,guest_FTOP)
254 #define OFFB_FC3210 offsetof(VexGuestX86State,guest_FC3210)
255 #define OFFB_FPROUND offsetof(VexGuestX86State,guest_FPROUND)
256
257 #define OFFB_CS offsetof(VexGuestX86State,guest_CS)
258 #define OFFB_DS offsetof(VexGuestX86State,guest_DS)
259 #define OFFB_ES offsetof(VexGuestX86State,guest_ES)
260 #define OFFB_FS offsetof(VexGuestX86State,guest_FS)
261 #define OFFB_GS offsetof(VexGuestX86State,guest_GS)
262 #define OFFB_SS offsetof(VexGuestX86State,guest_SS)
263 #define OFFB_LDT offsetof(VexGuestX86State,guest_LDT)
264 #define OFFB_GDT offsetof(VexGuestX86State,guest_GDT)
265
266 #define OFFB_SSEROUND offsetof(VexGuestX86State,guest_SSEROUND)
267 #define OFFB_XMM0 offsetof(VexGuestX86State,guest_XMM0)
268 #define OFFB_XMM1 offsetof(VexGuestX86State,guest_XMM1)
269 #define OFFB_XMM2 offsetof(VexGuestX86State,guest_XMM2)
270 #define OFFB_XMM3 offsetof(VexGuestX86State,guest_XMM3)
271 #define OFFB_XMM4 offsetof(VexGuestX86State,guest_XMM4)
272 #define OFFB_XMM5 offsetof(VexGuestX86State,guest_XMM5)
273 #define OFFB_XMM6 offsetof(VexGuestX86State,guest_XMM6)
274 #define OFFB_XMM7 offsetof(VexGuestX86State,guest_XMM7)
275
276 #define OFFB_EMNOTE offsetof(VexGuestX86State,guest_EMNOTE)
277
278 #define OFFB_CMSTART offsetof(VexGuestX86State,guest_CMSTART)
279 #define OFFB_CMLEN offsetof(VexGuestX86State,guest_CMLEN)
280 #define OFFB_NRADDR offsetof(VexGuestX86State,guest_NRADDR)
281
282 #define OFFB_IP_AT_SYSCALL offsetof(VexGuestX86State,guest_IP_AT_SYSCALL)
283
284
285 /*------------------------------------------------------------*/
286 /*--- Helper bits and pieces for deconstructing the ---*/
287 /*--- x86 insn stream. ---*/
288 /*------------------------------------------------------------*/
289
290 /* This is the Intel register encoding -- integer regs. */
291 #define R_EAX 0
292 #define R_ECX 1
293 #define R_EDX 2
294 #define R_EBX 3
295 #define R_ESP 4
296 #define R_EBP 5
297 #define R_ESI 6
298 #define R_EDI 7
299
300 #define R_AL (0+R_EAX)
301 #define R_AH (4+R_EAX)
302
303 /* This is the Intel register encoding -- segment regs. */
304 #define R_ES 0
305 #define R_CS 1
306 #define R_SS 2
307 #define R_DS 3
308 #define R_FS 4
309 #define R_GS 5
310
311
312 /* Add a statement to the list held by "irbb". */
stmt(IRStmt * st)313 static void stmt ( IRStmt* st )
314 {
315 addStmtToIRSB( irsb, st );
316 }
317
318 /* Generate a new temporary of the given type. */
newTemp(IRType ty)319 static IRTemp newTemp ( IRType ty )
320 {
321 vassert(isPlausibleIRType(ty));
322 return newIRTemp( irsb->tyenv, ty );
323 }
324
325 /* Various simple conversions */
326
extend_s_8to32(UInt x)327 static UInt extend_s_8to32( UInt x )
328 {
329 return (UInt)((((Int)x) << 24) >> 24);
330 }
331
extend_s_16to32(UInt x)332 static UInt extend_s_16to32 ( UInt x )
333 {
334 return (UInt)((((Int)x) << 16) >> 16);
335 }
336
337 /* Fetch a byte from the guest insn stream. */
getIByte(Int delta)338 static UChar getIByte ( Int delta )
339 {
340 return guest_code[delta];
341 }
342
343 /* Extract the reg field from a modRM byte. */
gregOfRM(UChar mod_reg_rm)344 static Int gregOfRM ( UChar mod_reg_rm )
345 {
346 return (Int)( (mod_reg_rm >> 3) & 7 );
347 }
348
349 /* Figure out whether the mod and rm parts of a modRM byte refer to a
350 register or memory. If so, the byte will have the form 11XXXYYY,
351 where YYY is the register number. */
epartIsReg(UChar mod_reg_rm)352 static Bool epartIsReg ( UChar mod_reg_rm )
353 {
354 return toBool(0xC0 == (mod_reg_rm & 0xC0));
355 }
356
357 /* ... and extract the register number ... */
eregOfRM(UChar mod_reg_rm)358 static Int eregOfRM ( UChar mod_reg_rm )
359 {
360 return (Int)(mod_reg_rm & 0x7);
361 }
362
363 /* Get a 8/16/32-bit unsigned value out of the insn stream. */
364
getUChar(Int delta)365 static UChar getUChar ( Int delta )
366 {
367 UChar v = guest_code[delta+0];
368 return toUChar(v);
369 }
370
getUDisp16(Int delta)371 static UInt getUDisp16 ( Int delta )
372 {
373 UInt v = guest_code[delta+1]; v <<= 8;
374 v |= guest_code[delta+0];
375 return v & 0xFFFF;
376 }
377
getUDisp32(Int delta)378 static UInt getUDisp32 ( Int delta )
379 {
380 UInt v = guest_code[delta+3]; v <<= 8;
381 v |= guest_code[delta+2]; v <<= 8;
382 v |= guest_code[delta+1]; v <<= 8;
383 v |= guest_code[delta+0];
384 return v;
385 }
386
getUDisp(Int size,Int delta)387 static UInt getUDisp ( Int size, Int delta )
388 {
389 switch (size) {
390 case 4: return getUDisp32(delta);
391 case 2: return getUDisp16(delta);
392 case 1: return (UInt)getUChar(delta);
393 default: vpanic("getUDisp(x86)");
394 }
395 return 0; /*notreached*/
396 }
397
398
399 /* Get a byte value out of the insn stream and sign-extend to 32
400 bits. */
getSDisp8(Int delta)401 static UInt getSDisp8 ( Int delta )
402 {
403 return extend_s_8to32( (UInt) (guest_code[delta]) );
404 }
405
getSDisp16(Int delta0)406 static UInt getSDisp16 ( Int delta0 )
407 {
408 UChar* eip = (UChar*)(&guest_code[delta0]);
409 UInt d = *eip++;
410 d |= ((*eip++) << 8);
411 return extend_s_16to32(d);
412 }
413
getSDisp(Int size,Int delta)414 static UInt getSDisp ( Int size, Int delta )
415 {
416 switch (size) {
417 case 4: return getUDisp32(delta);
418 case 2: return getSDisp16(delta);
419 case 1: return getSDisp8(delta);
420 default: vpanic("getSDisp(x86)");
421 }
422 return 0; /*notreached*/
423 }
424
425
426 /*------------------------------------------------------------*/
427 /*--- Helpers for constructing IR. ---*/
428 /*------------------------------------------------------------*/
429
430 /* Create a 1/2/4 byte read of an x86 integer registers. For 16/8 bit
431 register references, we need to take the host endianness into
432 account. Supplied value is 0 .. 7 and in the Intel instruction
433 encoding. */
434
szToITy(Int n)435 static IRType szToITy ( Int n )
436 {
437 switch (n) {
438 case 1: return Ity_I8;
439 case 2: return Ity_I16;
440 case 4: return Ity_I32;
441 default: vpanic("szToITy(x86)");
442 }
443 }
444
445 /* On a little-endian host, less significant bits of the guest
446 registers are at lower addresses. Therefore, if a reference to a
447 register low half has the safe guest state offset as a reference to
448 the full register.
449 */
integerGuestRegOffset(Int sz,UInt archreg)450 static Int integerGuestRegOffset ( Int sz, UInt archreg )
451 {
452 vassert(archreg < 8);
453
454 /* Correct for little-endian host only. */
455 vassert(!host_is_bigendian);
456
457 if (sz == 4 || sz == 2 || (sz == 1 && archreg < 4)) {
458 switch (archreg) {
459 case R_EAX: return OFFB_EAX;
460 case R_EBX: return OFFB_EBX;
461 case R_ECX: return OFFB_ECX;
462 case R_EDX: return OFFB_EDX;
463 case R_ESI: return OFFB_ESI;
464 case R_EDI: return OFFB_EDI;
465 case R_ESP: return OFFB_ESP;
466 case R_EBP: return OFFB_EBP;
467 default: vpanic("integerGuestRegOffset(x86,le)(4,2)");
468 }
469 }
470
471 vassert(archreg >= 4 && archreg < 8 && sz == 1);
472 switch (archreg-4) {
473 case R_EAX: return 1+ OFFB_EAX;
474 case R_EBX: return 1+ OFFB_EBX;
475 case R_ECX: return 1+ OFFB_ECX;
476 case R_EDX: return 1+ OFFB_EDX;
477 default: vpanic("integerGuestRegOffset(x86,le)(1h)");
478 }
479
480 /* NOTREACHED */
481 vpanic("integerGuestRegOffset(x86,le)");
482 }
483
segmentGuestRegOffset(UInt sreg)484 static Int segmentGuestRegOffset ( UInt sreg )
485 {
486 switch (sreg) {
487 case R_ES: return OFFB_ES;
488 case R_CS: return OFFB_CS;
489 case R_SS: return OFFB_SS;
490 case R_DS: return OFFB_DS;
491 case R_FS: return OFFB_FS;
492 case R_GS: return OFFB_GS;
493 default: vpanic("segmentGuestRegOffset(x86)");
494 }
495 }
496
xmmGuestRegOffset(UInt xmmreg)497 static Int xmmGuestRegOffset ( UInt xmmreg )
498 {
499 switch (xmmreg) {
500 case 0: return OFFB_XMM0;
501 case 1: return OFFB_XMM1;
502 case 2: return OFFB_XMM2;
503 case 3: return OFFB_XMM3;
504 case 4: return OFFB_XMM4;
505 case 5: return OFFB_XMM5;
506 case 6: return OFFB_XMM6;
507 case 7: return OFFB_XMM7;
508 default: vpanic("xmmGuestRegOffset");
509 }
510 }
511
512 /* Lanes of vector registers are always numbered from zero being the
513 least significant lane (rightmost in the register). */
514
xmmGuestRegLane16offset(UInt xmmreg,Int laneno)515 static Int xmmGuestRegLane16offset ( UInt xmmreg, Int laneno )
516 {
517 /* Correct for little-endian host only. */
518 vassert(!host_is_bigendian);
519 vassert(laneno >= 0 && laneno < 8);
520 return xmmGuestRegOffset( xmmreg ) + 2 * laneno;
521 }
522
xmmGuestRegLane32offset(UInt xmmreg,Int laneno)523 static Int xmmGuestRegLane32offset ( UInt xmmreg, Int laneno )
524 {
525 /* Correct for little-endian host only. */
526 vassert(!host_is_bigendian);
527 vassert(laneno >= 0 && laneno < 4);
528 return xmmGuestRegOffset( xmmreg ) + 4 * laneno;
529 }
530
xmmGuestRegLane64offset(UInt xmmreg,Int laneno)531 static Int xmmGuestRegLane64offset ( UInt xmmreg, Int laneno )
532 {
533 /* Correct for little-endian host only. */
534 vassert(!host_is_bigendian);
535 vassert(laneno >= 0 && laneno < 2);
536 return xmmGuestRegOffset( xmmreg ) + 8 * laneno;
537 }
538
getIReg(Int sz,UInt archreg)539 static IRExpr* getIReg ( Int sz, UInt archreg )
540 {
541 vassert(sz == 1 || sz == 2 || sz == 4);
542 vassert(archreg < 8);
543 return IRExpr_Get( integerGuestRegOffset(sz,archreg),
544 szToITy(sz) );
545 }
546
547 /* Ditto, but write to a reg instead. */
putIReg(Int sz,UInt archreg,IRExpr * e)548 static void putIReg ( Int sz, UInt archreg, IRExpr* e )
549 {
550 IRType ty = typeOfIRExpr(irsb->tyenv, e);
551 switch (sz) {
552 case 1: vassert(ty == Ity_I8); break;
553 case 2: vassert(ty == Ity_I16); break;
554 case 4: vassert(ty == Ity_I32); break;
555 default: vpanic("putIReg(x86)");
556 }
557 vassert(archreg < 8);
558 stmt( IRStmt_Put(integerGuestRegOffset(sz,archreg), e) );
559 }
560
getSReg(UInt sreg)561 static IRExpr* getSReg ( UInt sreg )
562 {
563 return IRExpr_Get( segmentGuestRegOffset(sreg), Ity_I16 );
564 }
565
putSReg(UInt sreg,IRExpr * e)566 static void putSReg ( UInt sreg, IRExpr* e )
567 {
568 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16);
569 stmt( IRStmt_Put( segmentGuestRegOffset(sreg), e ) );
570 }
571
getXMMReg(UInt xmmreg)572 static IRExpr* getXMMReg ( UInt xmmreg )
573 {
574 return IRExpr_Get( xmmGuestRegOffset(xmmreg), Ity_V128 );
575 }
576
getXMMRegLane64(UInt xmmreg,Int laneno)577 static IRExpr* getXMMRegLane64 ( UInt xmmreg, Int laneno )
578 {
579 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_I64 );
580 }
581
getXMMRegLane64F(UInt xmmreg,Int laneno)582 static IRExpr* getXMMRegLane64F ( UInt xmmreg, Int laneno )
583 {
584 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_F64 );
585 }
586
getXMMRegLane32(UInt xmmreg,Int laneno)587 static IRExpr* getXMMRegLane32 ( UInt xmmreg, Int laneno )
588 {
589 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_I32 );
590 }
591
getXMMRegLane32F(UInt xmmreg,Int laneno)592 static IRExpr* getXMMRegLane32F ( UInt xmmreg, Int laneno )
593 {
594 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_F32 );
595 }
596
putXMMReg(UInt xmmreg,IRExpr * e)597 static void putXMMReg ( UInt xmmreg, IRExpr* e )
598 {
599 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128);
600 stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg), e ) );
601 }
602
putXMMRegLane64(UInt xmmreg,Int laneno,IRExpr * e)603 static void putXMMRegLane64 ( UInt xmmreg, Int laneno, IRExpr* e )
604 {
605 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
606 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
607 }
608
putXMMRegLane64F(UInt xmmreg,Int laneno,IRExpr * e)609 static void putXMMRegLane64F ( UInt xmmreg, Int laneno, IRExpr* e )
610 {
611 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64);
612 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
613 }
614
putXMMRegLane32F(UInt xmmreg,Int laneno,IRExpr * e)615 static void putXMMRegLane32F ( UInt xmmreg, Int laneno, IRExpr* e )
616 {
617 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32);
618 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
619 }
620
putXMMRegLane32(UInt xmmreg,Int laneno,IRExpr * e)621 static void putXMMRegLane32 ( UInt xmmreg, Int laneno, IRExpr* e )
622 {
623 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
624 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
625 }
626
putXMMRegLane16(UInt xmmreg,Int laneno,IRExpr * e)627 static void putXMMRegLane16 ( UInt xmmreg, Int laneno, IRExpr* e )
628 {
629 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16);
630 stmt( IRStmt_Put( xmmGuestRegLane16offset(xmmreg,laneno), e ) );
631 }
632
assign(IRTemp dst,IRExpr * e)633 static void assign ( IRTemp dst, IRExpr* e )
634 {
635 stmt( IRStmt_WrTmp(dst, e) );
636 }
637
storeLE(IRExpr * addr,IRExpr * data)638 static void storeLE ( IRExpr* addr, IRExpr* data )
639 {
640 stmt( IRStmt_Store(Iend_LE, addr, data) );
641 }
642
unop(IROp op,IRExpr * a)643 static IRExpr* unop ( IROp op, IRExpr* a )
644 {
645 return IRExpr_Unop(op, a);
646 }
647
binop(IROp op,IRExpr * a1,IRExpr * a2)648 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
649 {
650 return IRExpr_Binop(op, a1, a2);
651 }
652
triop(IROp op,IRExpr * a1,IRExpr * a2,IRExpr * a3)653 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
654 {
655 return IRExpr_Triop(op, a1, a2, a3);
656 }
657
mkexpr(IRTemp tmp)658 static IRExpr* mkexpr ( IRTemp tmp )
659 {
660 return IRExpr_RdTmp(tmp);
661 }
662
mkU8(UInt i)663 static IRExpr* mkU8 ( UInt i )
664 {
665 vassert(i < 256);
666 return IRExpr_Const(IRConst_U8( (UChar)i ));
667 }
668
mkU16(UInt i)669 static IRExpr* mkU16 ( UInt i )
670 {
671 vassert(i < 65536);
672 return IRExpr_Const(IRConst_U16( (UShort)i ));
673 }
674
mkU32(UInt i)675 static IRExpr* mkU32 ( UInt i )
676 {
677 return IRExpr_Const(IRConst_U32(i));
678 }
679
mkU64(ULong i)680 static IRExpr* mkU64 ( ULong i )
681 {
682 return IRExpr_Const(IRConst_U64(i));
683 }
684
mkU(IRType ty,UInt i)685 static IRExpr* mkU ( IRType ty, UInt i )
686 {
687 if (ty == Ity_I8) return mkU8(i);
688 if (ty == Ity_I16) return mkU16(i);
689 if (ty == Ity_I32) return mkU32(i);
690 /* If this panics, it usually means you passed a size (1,2,4)
691 value as the IRType, rather than a real IRType. */
692 vpanic("mkU(x86)");
693 }
694
mkV128(UShort mask)695 static IRExpr* mkV128 ( UShort mask )
696 {
697 return IRExpr_Const(IRConst_V128(mask));
698 }
699
loadLE(IRType ty,IRExpr * addr)700 static IRExpr* loadLE ( IRType ty, IRExpr* addr )
701 {
702 return IRExpr_Load(Iend_LE, ty, addr);
703 }
704
mkSizedOp(IRType ty,IROp op8)705 static IROp mkSizedOp ( IRType ty, IROp op8 )
706 {
707 Int adj;
708 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
709 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8
710 || op8 == Iop_Mul8
711 || op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8
712 || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8
713 || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8
714 || op8 == Iop_CasCmpNE8
715 || op8 == Iop_ExpCmpNE8
716 || op8 == Iop_Not8);
717 adj = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2);
718 return adj + op8;
719 }
720
mkWidenOp(Int szSmall,Int szBig,Bool signd)721 static IROp mkWidenOp ( Int szSmall, Int szBig, Bool signd )
722 {
723 if (szSmall == 1 && szBig == 4) {
724 return signd ? Iop_8Sto32 : Iop_8Uto32;
725 }
726 if (szSmall == 1 && szBig == 2) {
727 return signd ? Iop_8Sto16 : Iop_8Uto16;
728 }
729 if (szSmall == 2 && szBig == 4) {
730 return signd ? Iop_16Sto32 : Iop_16Uto32;
731 }
732 vpanic("mkWidenOp(x86,guest)");
733 }
734
mkAnd1(IRExpr * x,IRExpr * y)735 static IRExpr* mkAnd1 ( IRExpr* x, IRExpr* y )
736 {
737 vassert(typeOfIRExpr(irsb->tyenv,x) == Ity_I1);
738 vassert(typeOfIRExpr(irsb->tyenv,y) == Ity_I1);
739 return unop(Iop_32to1,
740 binop(Iop_And32,
741 unop(Iop_1Uto32,x),
742 unop(Iop_1Uto32,y)));
743 }
744
745 /* Generate a compare-and-swap operation, operating on memory at
746 'addr'. The expected value is 'expVal' and the new value is
747 'newVal'. If the operation fails, then transfer control (with a
748 no-redir jump (XXX no -- see comment at top of this file)) to
749 'restart_point', which is presumably the address of the guest
750 instruction again -- retrying, essentially. */
casLE(IRExpr * addr,IRExpr * expVal,IRExpr * newVal,Addr32 restart_point)751 static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal,
752 Addr32 restart_point )
753 {
754 IRCAS* cas;
755 IRType tyE = typeOfIRExpr(irsb->tyenv, expVal);
756 IRType tyN = typeOfIRExpr(irsb->tyenv, newVal);
757 IRTemp oldTmp = newTemp(tyE);
758 IRTemp expTmp = newTemp(tyE);
759 vassert(tyE == tyN);
760 vassert(tyE == Ity_I32 || tyE == Ity_I16 || tyE == Ity_I8);
761 assign(expTmp, expVal);
762 cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr,
763 NULL, mkexpr(expTmp), NULL, newVal );
764 stmt( IRStmt_CAS(cas) );
765 stmt( IRStmt_Exit(
766 binop( mkSizedOp(tyE,Iop_CasCmpNE8),
767 mkexpr(oldTmp), mkexpr(expTmp) ),
768 Ijk_Boring, /*Ijk_NoRedir*/
769 IRConst_U32( restart_point ),
770 OFFB_EIP
771 ));
772 }
773
774
775 /*------------------------------------------------------------*/
776 /*--- Helpers for %eflags. ---*/
777 /*------------------------------------------------------------*/
778
779 /* -------------- Evaluating the flags-thunk. -------------- */
780
781 /* Build IR to calculate all the eflags from stored
782 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
783 Ity_I32. */
mk_x86g_calculate_eflags_all(void)784 static IRExpr* mk_x86g_calculate_eflags_all ( void )
785 {
786 IRExpr** args
787 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
788 IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
789 IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
790 IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
791 IRExpr* call
792 = mkIRExprCCall(
793 Ity_I32,
794 0/*regparm*/,
795 "x86g_calculate_eflags_all", &x86g_calculate_eflags_all,
796 args
797 );
798 /* Exclude OP and NDEP from definedness checking. We're only
799 interested in DEP1 and DEP2. */
800 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
801 return call;
802 }
803
804 /* Build IR to calculate some particular condition from stored
805 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
806 Ity_Bit. */
mk_x86g_calculate_condition(X86Condcode cond)807 static IRExpr* mk_x86g_calculate_condition ( X86Condcode cond )
808 {
809 IRExpr** args
810 = mkIRExprVec_5( mkU32(cond),
811 IRExpr_Get(OFFB_CC_OP, Ity_I32),
812 IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
813 IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
814 IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
815 IRExpr* call
816 = mkIRExprCCall(
817 Ity_I32,
818 0/*regparm*/,
819 "x86g_calculate_condition", &x86g_calculate_condition,
820 args
821 );
822 /* Exclude the requested condition, OP and NDEP from definedness
823 checking. We're only interested in DEP1 and DEP2. */
824 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<1) | (1<<4);
825 return unop(Iop_32to1, call);
826 }
827
828 /* Build IR to calculate just the carry flag from stored
829 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I32. */
mk_x86g_calculate_eflags_c(void)830 static IRExpr* mk_x86g_calculate_eflags_c ( void )
831 {
832 IRExpr** args
833 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
834 IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
835 IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
836 IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
837 IRExpr* call
838 = mkIRExprCCall(
839 Ity_I32,
840 3/*regparm*/,
841 "x86g_calculate_eflags_c", &x86g_calculate_eflags_c,
842 args
843 );
844 /* Exclude OP and NDEP from definedness checking. We're only
845 interested in DEP1 and DEP2. */
846 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
847 return call;
848 }
849
850
851 /* -------------- Building the flags-thunk. -------------- */
852
853 /* The machinery in this section builds the flag-thunk following a
854 flag-setting operation. Hence the various setFlags_* functions.
855 */
856
isAddSub(IROp op8)857 static Bool isAddSub ( IROp op8 )
858 {
859 return toBool(op8 == Iop_Add8 || op8 == Iop_Sub8);
860 }
861
isLogic(IROp op8)862 static Bool isLogic ( IROp op8 )
863 {
864 return toBool(op8 == Iop_And8 || op8 == Iop_Or8 || op8 == Iop_Xor8);
865 }
866
867 /* U-widen 8/16/32 bit int expr to 32. */
widenUto32(IRExpr * e)868 static IRExpr* widenUto32 ( IRExpr* e )
869 {
870 switch (typeOfIRExpr(irsb->tyenv,e)) {
871 case Ity_I32: return e;
872 case Ity_I16: return unop(Iop_16Uto32,e);
873 case Ity_I8: return unop(Iop_8Uto32,e);
874 default: vpanic("widenUto32");
875 }
876 }
877
878 /* S-widen 8/16/32 bit int expr to 32. */
widenSto32(IRExpr * e)879 static IRExpr* widenSto32 ( IRExpr* e )
880 {
881 switch (typeOfIRExpr(irsb->tyenv,e)) {
882 case Ity_I32: return e;
883 case Ity_I16: return unop(Iop_16Sto32,e);
884 case Ity_I8: return unop(Iop_8Sto32,e);
885 default: vpanic("widenSto32");
886 }
887 }
888
889 /* Narrow 8/16/32 bit int expr to 8/16/32. Clearly only some
890 of these combinations make sense. */
narrowTo(IRType dst_ty,IRExpr * e)891 static IRExpr* narrowTo ( IRType dst_ty, IRExpr* e )
892 {
893 IRType src_ty = typeOfIRExpr(irsb->tyenv,e);
894 if (src_ty == dst_ty)
895 return e;
896 if (src_ty == Ity_I32 && dst_ty == Ity_I16)
897 return unop(Iop_32to16, e);
898 if (src_ty == Ity_I32 && dst_ty == Ity_I8)
899 return unop(Iop_32to8, e);
900
901 vex_printf("\nsrc, dst tys are: ");
902 ppIRType(src_ty);
903 vex_printf(", ");
904 ppIRType(dst_ty);
905 vex_printf("\n");
906 vpanic("narrowTo(x86)");
907 }
908
909
910 /* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is
911 auto-sized up to the real op. */
912
913 static
setFlags_DEP1_DEP2(IROp op8,IRTemp dep1,IRTemp dep2,IRType ty)914 void setFlags_DEP1_DEP2 ( IROp op8, IRTemp dep1, IRTemp dep2, IRType ty )
915 {
916 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2);
917
918 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
919
920 switch (op8) {
921 case Iop_Add8: ccOp += X86G_CC_OP_ADDB; break;
922 case Iop_Sub8: ccOp += X86G_CC_OP_SUBB; break;
923 default: ppIROp(op8);
924 vpanic("setFlags_DEP1_DEP2(x86)");
925 }
926 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(ccOp)) );
927 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(dep1))) );
928 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(mkexpr(dep2))) );
929 /* Set NDEP even though it isn't used. This makes redundant-PUT
930 elimination of previous stores to this field work better. */
931 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
932 }
933
934
935 /* Set the OP and DEP1 fields only, and write zero to DEP2. */
936
937 static
setFlags_DEP1(IROp op8,IRTemp dep1,IRType ty)938 void setFlags_DEP1 ( IROp op8, IRTemp dep1, IRType ty )
939 {
940 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2);
941
942 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
943
944 switch (op8) {
945 case Iop_Or8:
946 case Iop_And8:
947 case Iop_Xor8: ccOp += X86G_CC_OP_LOGICB; break;
948 default: ppIROp(op8);
949 vpanic("setFlags_DEP1(x86)");
950 }
951 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(ccOp)) );
952 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(dep1))) );
953 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0)) );
954 /* Set NDEP even though it isn't used. This makes redundant-PUT
955 elimination of previous stores to this field work better. */
956 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
957 }
958
959
960 /* For shift operations, we put in the result and the undershifted
961 result. Except if the shift amount is zero, the thunk is left
962 unchanged. */
963
setFlags_DEP1_DEP2_shift(IROp op32,IRTemp res,IRTemp resUS,IRType ty,IRTemp guard)964 static void setFlags_DEP1_DEP2_shift ( IROp op32,
965 IRTemp res,
966 IRTemp resUS,
967 IRType ty,
968 IRTemp guard )
969 {
970 Int ccOp = ty==Ity_I8 ? 2 : (ty==Ity_I16 ? 1 : 0);
971
972 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
973 vassert(guard);
974
975 /* Both kinds of right shifts are handled by the same thunk
976 operation. */
977 switch (op32) {
978 case Iop_Shr32:
979 case Iop_Sar32: ccOp = X86G_CC_OP_SHRL - ccOp; break;
980 case Iop_Shl32: ccOp = X86G_CC_OP_SHLL - ccOp; break;
981 default: ppIROp(op32);
982 vpanic("setFlags_DEP1_DEP2_shift(x86)");
983 }
984
985 /* guard :: Ity_I8. We need to convert it to I1. */
986 IRTemp guardB = newTemp(Ity_I1);
987 assign( guardB, binop(Iop_CmpNE8, mkexpr(guard), mkU8(0)) );
988
989 /* DEP1 contains the result, DEP2 contains the undershifted value. */
990 stmt( IRStmt_Put( OFFB_CC_OP,
991 IRExpr_ITE( mkexpr(guardB),
992 mkU32(ccOp),
993 IRExpr_Get(OFFB_CC_OP,Ity_I32) ) ));
994 stmt( IRStmt_Put( OFFB_CC_DEP1,
995 IRExpr_ITE( mkexpr(guardB),
996 widenUto32(mkexpr(res)),
997 IRExpr_Get(OFFB_CC_DEP1,Ity_I32) ) ));
998 stmt( IRStmt_Put( OFFB_CC_DEP2,
999 IRExpr_ITE( mkexpr(guardB),
1000 widenUto32(mkexpr(resUS)),
1001 IRExpr_Get(OFFB_CC_DEP2,Ity_I32) ) ));
1002 /* Set NDEP even though it isn't used. This makes redundant-PUT
1003 elimination of previous stores to this field work better. */
1004 stmt( IRStmt_Put( OFFB_CC_NDEP,
1005 IRExpr_ITE( mkexpr(guardB),
1006 mkU32(0),
1007 IRExpr_Get(OFFB_CC_NDEP,Ity_I32) ) ));
1008 }
1009
1010
1011 /* For the inc/dec case, we store in DEP1 the result value and in NDEP
1012 the former value of the carry flag, which unfortunately we have to
1013 compute. */
1014
setFlags_INC_DEC(Bool inc,IRTemp res,IRType ty)1015 static void setFlags_INC_DEC ( Bool inc, IRTemp res, IRType ty )
1016 {
1017 Int ccOp = inc ? X86G_CC_OP_INCB : X86G_CC_OP_DECB;
1018
1019 ccOp += ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2);
1020 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
1021
1022 /* This has to come first, because calculating the C flag
1023 may require reading all four thunk fields. */
1024 stmt( IRStmt_Put( OFFB_CC_NDEP, mk_x86g_calculate_eflags_c()) );
1025 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(ccOp)) );
1026 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(res))) );
1027 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0)) );
1028 }
1029
1030
1031 /* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the
1032 two arguments. */
1033
1034 static
setFlags_MUL(IRType ty,IRTemp arg1,IRTemp arg2,UInt base_op)1035 void setFlags_MUL ( IRType ty, IRTemp arg1, IRTemp arg2, UInt base_op )
1036 {
1037 switch (ty) {
1038 case Ity_I8:
1039 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(base_op+0) ) );
1040 break;
1041 case Ity_I16:
1042 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(base_op+1) ) );
1043 break;
1044 case Ity_I32:
1045 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(base_op+2) ) );
1046 break;
1047 default:
1048 vpanic("setFlags_MUL(x86)");
1049 }
1050 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(arg1)) ));
1051 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(mkexpr(arg2)) ));
1052 /* Set NDEP even though it isn't used. This makes redundant-PUT
1053 elimination of previous stores to this field work better. */
1054 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
1055 }
1056
1057
1058 /* -------------- Condition codes. -------------- */
1059
1060 /* Condition codes, using the Intel encoding. */
1061
name_X86Condcode(X86Condcode cond)1062 static const HChar* name_X86Condcode ( X86Condcode cond )
1063 {
1064 switch (cond) {
1065 case X86CondO: return "o";
1066 case X86CondNO: return "no";
1067 case X86CondB: return "b";
1068 case X86CondNB: return "nb";
1069 case X86CondZ: return "z";
1070 case X86CondNZ: return "nz";
1071 case X86CondBE: return "be";
1072 case X86CondNBE: return "nbe";
1073 case X86CondS: return "s";
1074 case X86CondNS: return "ns";
1075 case X86CondP: return "p";
1076 case X86CondNP: return "np";
1077 case X86CondL: return "l";
1078 case X86CondNL: return "nl";
1079 case X86CondLE: return "le";
1080 case X86CondNLE: return "nle";
1081 case X86CondAlways: return "ALWAYS";
1082 default: vpanic("name_X86Condcode");
1083 }
1084 }
1085
1086 static
positiveIse_X86Condcode(X86Condcode cond,Bool * needInvert)1087 X86Condcode positiveIse_X86Condcode ( X86Condcode cond,
1088 Bool* needInvert )
1089 {
1090 vassert(cond >= X86CondO && cond <= X86CondNLE);
1091 if (cond & 1) {
1092 *needInvert = True;
1093 return cond-1;
1094 } else {
1095 *needInvert = False;
1096 return cond;
1097 }
1098 }
1099
1100
1101 /* -------------- Helpers for ADD/SUB with carry. -------------- */
1102
1103 /* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags
1104 appropriately.
1105
1106 Optionally, generate a store for the 'tres' value. This can either
1107 be a normal store, or it can be a cas-with-possible-failure style
1108 store:
1109
1110 if taddr is IRTemp_INVALID, then no store is generated.
1111
1112 if taddr is not IRTemp_INVALID, then a store (using taddr as
1113 the address) is generated:
1114
1115 if texpVal is IRTemp_INVALID then a normal store is
1116 generated, and restart_point must be zero (it is irrelevant).
1117
1118 if texpVal is not IRTemp_INVALID then a cas-style store is
1119 generated. texpVal is the expected value, restart_point
1120 is the restart point if the store fails, and texpVal must
1121 have the same type as tres.
1122 */
helper_ADC(Int sz,IRTemp tres,IRTemp ta1,IRTemp ta2,IRTemp taddr,IRTemp texpVal,Addr32 restart_point)1123 static void helper_ADC ( Int sz,
1124 IRTemp tres, IRTemp ta1, IRTemp ta2,
1125 /* info about optional store: */
1126 IRTemp taddr, IRTemp texpVal, Addr32 restart_point )
1127 {
1128 UInt thunkOp;
1129 IRType ty = szToITy(sz);
1130 IRTemp oldc = newTemp(Ity_I32);
1131 IRTemp oldcn = newTemp(ty);
1132 IROp plus = mkSizedOp(ty, Iop_Add8);
1133 IROp xor = mkSizedOp(ty, Iop_Xor8);
1134
1135 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
1136 vassert(sz == 1 || sz == 2 || sz == 4);
1137 thunkOp = sz==4 ? X86G_CC_OP_ADCL
1138 : (sz==2 ? X86G_CC_OP_ADCW : X86G_CC_OP_ADCB);
1139
1140 /* oldc = old carry flag, 0 or 1 */
1141 assign( oldc, binop(Iop_And32,
1142 mk_x86g_calculate_eflags_c(),
1143 mkU32(1)) );
1144
1145 assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
1146
1147 assign( tres, binop(plus,
1148 binop(plus,mkexpr(ta1),mkexpr(ta2)),
1149 mkexpr(oldcn)) );
1150
1151 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
1152 start of this function. */
1153 if (taddr != IRTemp_INVALID) {
1154 if (texpVal == IRTemp_INVALID) {
1155 vassert(restart_point == 0);
1156 storeLE( mkexpr(taddr), mkexpr(tres) );
1157 } else {
1158 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
1159 /* .. and hence 'texpVal' has the same type as 'tres'. */
1160 casLE( mkexpr(taddr),
1161 mkexpr(texpVal), mkexpr(tres), restart_point );
1162 }
1163 }
1164
1165 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(thunkOp) ) );
1166 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(ta1)) ));
1167 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(binop(xor, mkexpr(ta2),
1168 mkexpr(oldcn)) )) );
1169 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
1170 }
1171
1172
1173 /* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags
1174 appropriately. As with helper_ADC, possibly generate a store of
1175 the result -- see comments on helper_ADC for details.
1176 */
helper_SBB(Int sz,IRTemp tres,IRTemp ta1,IRTemp ta2,IRTemp taddr,IRTemp texpVal,Addr32 restart_point)1177 static void helper_SBB ( Int sz,
1178 IRTemp tres, IRTemp ta1, IRTemp ta2,
1179 /* info about optional store: */
1180 IRTemp taddr, IRTemp texpVal, Addr32 restart_point )
1181 {
1182 UInt thunkOp;
1183 IRType ty = szToITy(sz);
1184 IRTemp oldc = newTemp(Ity_I32);
1185 IRTemp oldcn = newTemp(ty);
1186 IROp minus = mkSizedOp(ty, Iop_Sub8);
1187 IROp xor = mkSizedOp(ty, Iop_Xor8);
1188
1189 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
1190 vassert(sz == 1 || sz == 2 || sz == 4);
1191 thunkOp = sz==4 ? X86G_CC_OP_SBBL
1192 : (sz==2 ? X86G_CC_OP_SBBW : X86G_CC_OP_SBBB);
1193
1194 /* oldc = old carry flag, 0 or 1 */
1195 assign( oldc, binop(Iop_And32,
1196 mk_x86g_calculate_eflags_c(),
1197 mkU32(1)) );
1198
1199 assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
1200
1201 assign( tres, binop(minus,
1202 binop(minus,mkexpr(ta1),mkexpr(ta2)),
1203 mkexpr(oldcn)) );
1204
1205 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
1206 start of this function. */
1207 if (taddr != IRTemp_INVALID) {
1208 if (texpVal == IRTemp_INVALID) {
1209 vassert(restart_point == 0);
1210 storeLE( mkexpr(taddr), mkexpr(tres) );
1211 } else {
1212 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
1213 /* .. and hence 'texpVal' has the same type as 'tres'. */
1214 casLE( mkexpr(taddr),
1215 mkexpr(texpVal), mkexpr(tres), restart_point );
1216 }
1217 }
1218
1219 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(thunkOp) ) );
1220 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(ta1) )) );
1221 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(binop(xor, mkexpr(ta2),
1222 mkexpr(oldcn)) )) );
1223 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
1224 }
1225
1226
1227 /* -------------- Helpers for disassembly printing. -------------- */
1228
nameGrp1(Int opc_aux)1229 static const HChar* nameGrp1 ( Int opc_aux )
1230 {
1231 static const HChar* grp1_names[8]
1232 = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" };
1233 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp1(x86)");
1234 return grp1_names[opc_aux];
1235 }
1236
nameGrp2(Int opc_aux)1237 static const HChar* nameGrp2 ( Int opc_aux )
1238 {
1239 static const HChar* grp2_names[8]
1240 = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" };
1241 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp2(x86)");
1242 return grp2_names[opc_aux];
1243 }
1244
nameGrp4(Int opc_aux)1245 static const HChar* nameGrp4 ( Int opc_aux )
1246 {
1247 static const HChar* grp4_names[8]
1248 = { "inc", "dec", "???", "???", "???", "???", "???", "???" };
1249 if (opc_aux < 0 || opc_aux > 1) vpanic("nameGrp4(x86)");
1250 return grp4_names[opc_aux];
1251 }
1252
nameGrp5(Int opc_aux)1253 static const HChar* nameGrp5 ( Int opc_aux )
1254 {
1255 static const HChar* grp5_names[8]
1256 = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" };
1257 if (opc_aux < 0 || opc_aux > 6) vpanic("nameGrp5(x86)");
1258 return grp5_names[opc_aux];
1259 }
1260
nameGrp8(Int opc_aux)1261 static const HChar* nameGrp8 ( Int opc_aux )
1262 {
1263 static const HChar* grp8_names[8]
1264 = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" };
1265 if (opc_aux < 4 || opc_aux > 7) vpanic("nameGrp8(x86)");
1266 return grp8_names[opc_aux];
1267 }
1268
nameIReg(Int size,Int reg)1269 static const HChar* nameIReg ( Int size, Int reg )
1270 {
1271 static const HChar* ireg32_names[8]
1272 = { "%eax", "%ecx", "%edx", "%ebx",
1273 "%esp", "%ebp", "%esi", "%edi" };
1274 static const HChar* ireg16_names[8]
1275 = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di" };
1276 static const HChar* ireg8_names[8]
1277 = { "%al", "%cl", "%dl", "%bl",
1278 "%ah{sp}", "%ch{bp}", "%dh{si}", "%bh{di}" };
1279 if (reg < 0 || reg > 7) goto bad;
1280 switch (size) {
1281 case 4: return ireg32_names[reg];
1282 case 2: return ireg16_names[reg];
1283 case 1: return ireg8_names[reg];
1284 }
1285 bad:
1286 vpanic("nameIReg(X86)");
1287 return NULL; /*notreached*/
1288 }
1289
nameSReg(UInt sreg)1290 static const HChar* nameSReg ( UInt sreg )
1291 {
1292 switch (sreg) {
1293 case R_ES: return "%es";
1294 case R_CS: return "%cs";
1295 case R_SS: return "%ss";
1296 case R_DS: return "%ds";
1297 case R_FS: return "%fs";
1298 case R_GS: return "%gs";
1299 default: vpanic("nameSReg(x86)");
1300 }
1301 }
1302
nameMMXReg(Int mmxreg)1303 static const HChar* nameMMXReg ( Int mmxreg )
1304 {
1305 static const HChar* mmx_names[8]
1306 = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" };
1307 if (mmxreg < 0 || mmxreg > 7) vpanic("nameMMXReg(x86,guest)");
1308 return mmx_names[mmxreg];
1309 }
1310
nameXMMReg(Int xmmreg)1311 static const HChar* nameXMMReg ( Int xmmreg )
1312 {
1313 static const HChar* xmm_names[8]
1314 = { "%xmm0", "%xmm1", "%xmm2", "%xmm3",
1315 "%xmm4", "%xmm5", "%xmm6", "%xmm7" };
1316 if (xmmreg < 0 || xmmreg > 7) vpanic("name_of_xmm_reg");
1317 return xmm_names[xmmreg];
1318 }
1319
nameMMXGran(Int gran)1320 static const HChar* nameMMXGran ( Int gran )
1321 {
1322 switch (gran) {
1323 case 0: return "b";
1324 case 1: return "w";
1325 case 2: return "d";
1326 case 3: return "q";
1327 default: vpanic("nameMMXGran(x86,guest)");
1328 }
1329 }
1330
nameISize(Int size)1331 static HChar nameISize ( Int size )
1332 {
1333 switch (size) {
1334 case 4: return 'l';
1335 case 2: return 'w';
1336 case 1: return 'b';
1337 default: vpanic("nameISize(x86)");
1338 }
1339 }
1340
1341
1342 /*------------------------------------------------------------*/
1343 /*--- JMP helpers ---*/
1344 /*------------------------------------------------------------*/
1345
jmp_lit(DisResult * dres,IRJumpKind kind,Addr32 d32)1346 static void jmp_lit( /*MOD*/DisResult* dres,
1347 IRJumpKind kind, Addr32 d32 )
1348 {
1349 vassert(dres->whatNext == Dis_Continue);
1350 vassert(dres->len == 0);
1351 vassert(dres->continueAt == 0);
1352 vassert(dres->jk_StopHere == Ijk_INVALID);
1353 dres->whatNext = Dis_StopHere;
1354 dres->jk_StopHere = kind;
1355 stmt( IRStmt_Put( OFFB_EIP, mkU32(d32) ) );
1356 }
1357
jmp_treg(DisResult * dres,IRJumpKind kind,IRTemp t)1358 static void jmp_treg( /*MOD*/DisResult* dres,
1359 IRJumpKind kind, IRTemp t )
1360 {
1361 vassert(dres->whatNext == Dis_Continue);
1362 vassert(dres->len == 0);
1363 vassert(dres->continueAt == 0);
1364 vassert(dres->jk_StopHere == Ijk_INVALID);
1365 dres->whatNext = Dis_StopHere;
1366 dres->jk_StopHere = kind;
1367 stmt( IRStmt_Put( OFFB_EIP, mkexpr(t) ) );
1368 }
1369
1370 static
jcc_01(DisResult * dres,X86Condcode cond,Addr32 d32_false,Addr32 d32_true)1371 void jcc_01( /*MOD*/DisResult* dres,
1372 X86Condcode cond, Addr32 d32_false, Addr32 d32_true )
1373 {
1374 Bool invert;
1375 X86Condcode condPos;
1376 vassert(dres->whatNext == Dis_Continue);
1377 vassert(dres->len == 0);
1378 vassert(dres->continueAt == 0);
1379 vassert(dres->jk_StopHere == Ijk_INVALID);
1380 dres->whatNext = Dis_StopHere;
1381 dres->jk_StopHere = Ijk_Boring;
1382 condPos = positiveIse_X86Condcode ( cond, &invert );
1383 if (invert) {
1384 stmt( IRStmt_Exit( mk_x86g_calculate_condition(condPos),
1385 Ijk_Boring,
1386 IRConst_U32(d32_false),
1387 OFFB_EIP ) );
1388 stmt( IRStmt_Put( OFFB_EIP, mkU32(d32_true) ) );
1389 } else {
1390 stmt( IRStmt_Exit( mk_x86g_calculate_condition(condPos),
1391 Ijk_Boring,
1392 IRConst_U32(d32_true),
1393 OFFB_EIP ) );
1394 stmt( IRStmt_Put( OFFB_EIP, mkU32(d32_false) ) );
1395 }
1396 }
1397
1398
1399 /*------------------------------------------------------------*/
1400 /*--- Disassembling addressing modes ---*/
1401 /*------------------------------------------------------------*/
1402
1403 static
sorbTxt(UChar sorb)1404 const HChar* sorbTxt ( UChar sorb )
1405 {
1406 switch (sorb) {
1407 case 0: return ""; /* no override */
1408 case 0x3E: return "%ds";
1409 case 0x26: return "%es:";
1410 case 0x64: return "%fs:";
1411 case 0x65: return "%gs:";
1412 default: vpanic("sorbTxt(x86,guest)");
1413 }
1414 }
1415
1416
1417 /* 'virtual' is an IRExpr* holding a virtual address. Convert it to a
1418 linear address by adding any required segment override as indicated
1419 by sorb. */
1420 static
handleSegOverride(UChar sorb,IRExpr * virtual)1421 IRExpr* handleSegOverride ( UChar sorb, IRExpr* virtual )
1422 {
1423 Int sreg;
1424 IRType hWordTy;
1425 IRTemp ldt_ptr, gdt_ptr, seg_selector, r64;
1426
1427 if (sorb == 0)
1428 /* the common case - no override */
1429 return virtual;
1430
1431 switch (sorb) {
1432 case 0x3E: sreg = R_DS; break;
1433 case 0x26: sreg = R_ES; break;
1434 case 0x64: sreg = R_FS; break;
1435 case 0x65: sreg = R_GS; break;
1436 default: vpanic("handleSegOverride(x86,guest)");
1437 }
1438
1439 hWordTy = sizeof(HWord)==4 ? Ity_I32 : Ity_I64;
1440
1441 seg_selector = newTemp(Ity_I32);
1442 ldt_ptr = newTemp(hWordTy);
1443 gdt_ptr = newTemp(hWordTy);
1444 r64 = newTemp(Ity_I64);
1445
1446 assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) );
1447 assign( ldt_ptr, IRExpr_Get( OFFB_LDT, hWordTy ));
1448 assign( gdt_ptr, IRExpr_Get( OFFB_GDT, hWordTy ));
1449
1450 /*
1451 Call this to do the translation and limit checks:
1452 ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
1453 UInt seg_selector, UInt virtual_addr )
1454 */
1455 assign(
1456 r64,
1457 mkIRExprCCall(
1458 Ity_I64,
1459 0/*regparms*/,
1460 "x86g_use_seg_selector",
1461 &x86g_use_seg_selector,
1462 mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr),
1463 mkexpr(seg_selector), virtual)
1464 )
1465 );
1466
1467 /* If the high 32 of the result are non-zero, there was a
1468 failure in address translation. In which case, make a
1469 quick exit.
1470 */
1471 stmt(
1472 IRStmt_Exit(
1473 binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)),
1474 Ijk_MapFail,
1475 IRConst_U32( guest_EIP_curr_instr ),
1476 OFFB_EIP
1477 )
1478 );
1479
1480 /* otherwise, here's the translated result. */
1481 return unop(Iop_64to32, mkexpr(r64));
1482 }
1483
1484
1485 /* Generate IR to calculate an address indicated by a ModRM and
1486 following SIB bytes. The expression, and the number of bytes in
1487 the address mode, are returned. Note that this fn should not be
1488 called if the R/M part of the address denotes a register instead of
1489 memory. If print_codegen is true, text of the addressing mode is
1490 placed in buf.
1491
1492 The computed address is stored in a new tempreg, and the
1493 identity of the tempreg is returned. */
1494
disAMode_copy2tmp(IRExpr * addr32)1495 static IRTemp disAMode_copy2tmp ( IRExpr* addr32 )
1496 {
1497 IRTemp tmp = newTemp(Ity_I32);
1498 assign( tmp, addr32 );
1499 return tmp;
1500 }
1501
1502 static
disAMode(Int * len,UChar sorb,Int delta,HChar * buf)1503 IRTemp disAMode ( Int* len, UChar sorb, Int delta, HChar* buf )
1504 {
1505 UChar mod_reg_rm = getIByte(delta);
1506 delta++;
1507
1508 buf[0] = (UChar)0;
1509
1510 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
1511 jump table seems a bit excessive.
1512 */
1513 mod_reg_rm &= 0xC7; /* is now XX000YYY */
1514 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
1515 /* is now XX0XXYYY */
1516 mod_reg_rm &= 0x1F; /* is now 000XXYYY */
1517 switch (mod_reg_rm) {
1518
1519 /* (%eax) .. (%edi), not including (%esp) or (%ebp).
1520 --> GET %reg, t
1521 */
1522 case 0x00: case 0x01: case 0x02: case 0x03:
1523 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
1524 { UChar rm = mod_reg_rm;
1525 DIS(buf, "%s(%s)", sorbTxt(sorb), nameIReg(4,rm));
1526 *len = 1;
1527 return disAMode_copy2tmp(
1528 handleSegOverride(sorb, getIReg(4,rm)));
1529 }
1530
1531 /* d8(%eax) ... d8(%edi), not including d8(%esp)
1532 --> GET %reg, t ; ADDL d8, t
1533 */
1534 case 0x08: case 0x09: case 0x0A: case 0x0B:
1535 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
1536 { UChar rm = toUChar(mod_reg_rm & 7);
1537 UInt d = getSDisp8(delta);
1538 DIS(buf, "%s%d(%s)", sorbTxt(sorb), (Int)d, nameIReg(4,rm));
1539 *len = 2;
1540 return disAMode_copy2tmp(
1541 handleSegOverride(sorb,
1542 binop(Iop_Add32,getIReg(4,rm),mkU32(d))));
1543 }
1544
1545 /* d32(%eax) ... d32(%edi), not including d32(%esp)
1546 --> GET %reg, t ; ADDL d8, t
1547 */
1548 case 0x10: case 0x11: case 0x12: case 0x13:
1549 /* ! 14 */ case 0x15: case 0x16: case 0x17:
1550 { UChar rm = toUChar(mod_reg_rm & 7);
1551 UInt d = getUDisp32(delta);
1552 DIS(buf, "%s0x%x(%s)", sorbTxt(sorb), (Int)d, nameIReg(4,rm));
1553 *len = 5;
1554 return disAMode_copy2tmp(
1555 handleSegOverride(sorb,
1556 binop(Iop_Add32,getIReg(4,rm),mkU32(d))));
1557 }
1558
1559 /* a register, %eax .. %edi. This shouldn't happen. */
1560 case 0x18: case 0x19: case 0x1A: case 0x1B:
1561 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
1562 vpanic("disAMode(x86): not an addr!");
1563
1564 /* a 32-bit literal address
1565 --> MOV d32, tmp
1566 */
1567 case 0x05:
1568 { UInt d = getUDisp32(delta);
1569 *len = 5;
1570 DIS(buf, "%s(0x%x)", sorbTxt(sorb), d);
1571 return disAMode_copy2tmp(
1572 handleSegOverride(sorb, mkU32(d)));
1573 }
1574
1575 case 0x04: {
1576 /* SIB, with no displacement. Special cases:
1577 -- %esp cannot act as an index value.
1578 If index_r indicates %esp, zero is used for the index.
1579 -- when mod is zero and base indicates EBP, base is instead
1580 a 32-bit literal.
1581 It's all madness, I tell you. Extract %index, %base and
1582 scale from the SIB byte. The value denoted is then:
1583 | %index == %ESP && %base == %EBP
1584 = d32 following SIB byte
1585 | %index == %ESP && %base != %EBP
1586 = %base
1587 | %index != %ESP && %base == %EBP
1588 = d32 following SIB byte + (%index << scale)
1589 | %index != %ESP && %base != %ESP
1590 = %base + (%index << scale)
1591
1592 What happens to the souls of CPU architects who dream up such
1593 horrendous schemes, do you suppose?
1594 */
1595 UChar sib = getIByte(delta);
1596 UChar scale = toUChar((sib >> 6) & 3);
1597 UChar index_r = toUChar((sib >> 3) & 7);
1598 UChar base_r = toUChar(sib & 7);
1599 delta++;
1600
1601 if (index_r != R_ESP && base_r != R_EBP) {
1602 DIS(buf, "%s(%s,%s,%d)", sorbTxt(sorb),
1603 nameIReg(4,base_r), nameIReg(4,index_r), 1<<scale);
1604 *len = 2;
1605 return
1606 disAMode_copy2tmp(
1607 handleSegOverride(sorb,
1608 binop(Iop_Add32,
1609 getIReg(4,base_r),
1610 binop(Iop_Shl32, getIReg(4,index_r),
1611 mkU8(scale)))));
1612 }
1613
1614 if (index_r != R_ESP && base_r == R_EBP) {
1615 UInt d = getUDisp32(delta);
1616 DIS(buf, "%s0x%x(,%s,%d)", sorbTxt(sorb), d,
1617 nameIReg(4,index_r), 1<<scale);
1618 *len = 6;
1619 return
1620 disAMode_copy2tmp(
1621 handleSegOverride(sorb,
1622 binop(Iop_Add32,
1623 binop(Iop_Shl32, getIReg(4,index_r), mkU8(scale)),
1624 mkU32(d))));
1625 }
1626
1627 if (index_r == R_ESP && base_r != R_EBP) {
1628 DIS(buf, "%s(%s,,)", sorbTxt(sorb), nameIReg(4,base_r));
1629 *len = 2;
1630 return disAMode_copy2tmp(
1631 handleSegOverride(sorb, getIReg(4,base_r)));
1632 }
1633
1634 if (index_r == R_ESP && base_r == R_EBP) {
1635 UInt d = getUDisp32(delta);
1636 DIS(buf, "%s0x%x(,,)", sorbTxt(sorb), d);
1637 *len = 6;
1638 return disAMode_copy2tmp(
1639 handleSegOverride(sorb, mkU32(d)));
1640 }
1641 /*NOTREACHED*/
1642 vassert(0);
1643 }
1644
1645 /* SIB, with 8-bit displacement. Special cases:
1646 -- %esp cannot act as an index value.
1647 If index_r indicates %esp, zero is used for the index.
1648 Denoted value is:
1649 | %index == %ESP
1650 = d8 + %base
1651 | %index != %ESP
1652 = d8 + %base + (%index << scale)
1653 */
1654 case 0x0C: {
1655 UChar sib = getIByte(delta);
1656 UChar scale = toUChar((sib >> 6) & 3);
1657 UChar index_r = toUChar((sib >> 3) & 7);
1658 UChar base_r = toUChar(sib & 7);
1659 UInt d = getSDisp8(delta+1);
1660
1661 if (index_r == R_ESP) {
1662 DIS(buf, "%s%d(%s,,)", sorbTxt(sorb),
1663 (Int)d, nameIReg(4,base_r));
1664 *len = 3;
1665 return disAMode_copy2tmp(
1666 handleSegOverride(sorb,
1667 binop(Iop_Add32, getIReg(4,base_r), mkU32(d)) ));
1668 } else {
1669 DIS(buf, "%s%d(%s,%s,%d)", sorbTxt(sorb), (Int)d,
1670 nameIReg(4,base_r), nameIReg(4,index_r), 1<<scale);
1671 *len = 3;
1672 return
1673 disAMode_copy2tmp(
1674 handleSegOverride(sorb,
1675 binop(Iop_Add32,
1676 binop(Iop_Add32,
1677 getIReg(4,base_r),
1678 binop(Iop_Shl32,
1679 getIReg(4,index_r), mkU8(scale))),
1680 mkU32(d))));
1681 }
1682 /*NOTREACHED*/
1683 vassert(0);
1684 }
1685
1686 /* SIB, with 32-bit displacement. Special cases:
1687 -- %esp cannot act as an index value.
1688 If index_r indicates %esp, zero is used for the index.
1689 Denoted value is:
1690 | %index == %ESP
1691 = d32 + %base
1692 | %index != %ESP
1693 = d32 + %base + (%index << scale)
1694 */
1695 case 0x14: {
1696 UChar sib = getIByte(delta);
1697 UChar scale = toUChar((sib >> 6) & 3);
1698 UChar index_r = toUChar((sib >> 3) & 7);
1699 UChar base_r = toUChar(sib & 7);
1700 UInt d = getUDisp32(delta+1);
1701
1702 if (index_r == R_ESP) {
1703 DIS(buf, "%s%d(%s,,)", sorbTxt(sorb),
1704 (Int)d, nameIReg(4,base_r));
1705 *len = 6;
1706 return disAMode_copy2tmp(
1707 handleSegOverride(sorb,
1708 binop(Iop_Add32, getIReg(4,base_r), mkU32(d)) ));
1709 } else {
1710 DIS(buf, "%s%d(%s,%s,%d)", sorbTxt(sorb), (Int)d,
1711 nameIReg(4,base_r), nameIReg(4,index_r), 1<<scale);
1712 *len = 6;
1713 return
1714 disAMode_copy2tmp(
1715 handleSegOverride(sorb,
1716 binop(Iop_Add32,
1717 binop(Iop_Add32,
1718 getIReg(4,base_r),
1719 binop(Iop_Shl32,
1720 getIReg(4,index_r), mkU8(scale))),
1721 mkU32(d))));
1722 }
1723 /*NOTREACHED*/
1724 vassert(0);
1725 }
1726
1727 default:
1728 vpanic("disAMode(x86)");
1729 return 0; /*notreached*/
1730 }
1731 }
1732
1733
1734 /* Figure out the number of (insn-stream) bytes constituting the amode
1735 beginning at delta. Is useful for getting hold of literals beyond
1736 the end of the amode before it has been disassembled. */
1737
lengthAMode(Int delta)1738 static UInt lengthAMode ( Int delta )
1739 {
1740 UChar mod_reg_rm = getIByte(delta); delta++;
1741
1742 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
1743 jump table seems a bit excessive.
1744 */
1745 mod_reg_rm &= 0xC7; /* is now XX000YYY */
1746 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
1747 /* is now XX0XXYYY */
1748 mod_reg_rm &= 0x1F; /* is now 000XXYYY */
1749 switch (mod_reg_rm) {
1750
1751 /* (%eax) .. (%edi), not including (%esp) or (%ebp). */
1752 case 0x00: case 0x01: case 0x02: case 0x03:
1753 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
1754 return 1;
1755
1756 /* d8(%eax) ... d8(%edi), not including d8(%esp). */
1757 case 0x08: case 0x09: case 0x0A: case 0x0B:
1758 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
1759 return 2;
1760
1761 /* d32(%eax) ... d32(%edi), not including d32(%esp). */
1762 case 0x10: case 0x11: case 0x12: case 0x13:
1763 /* ! 14 */ case 0x15: case 0x16: case 0x17:
1764 return 5;
1765
1766 /* a register, %eax .. %edi. (Not an addr, but still handled.) */
1767 case 0x18: case 0x19: case 0x1A: case 0x1B:
1768 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
1769 return 1;
1770
1771 /* a 32-bit literal address. */
1772 case 0x05: return 5;
1773
1774 /* SIB, no displacement. */
1775 case 0x04: {
1776 UChar sib = getIByte(delta);
1777 UChar base_r = toUChar(sib & 7);
1778 if (base_r == R_EBP) return 6; else return 2;
1779 }
1780 /* SIB, with 8-bit displacement. */
1781 case 0x0C: return 3;
1782
1783 /* SIB, with 32-bit displacement. */
1784 case 0x14: return 6;
1785
1786 default:
1787 vpanic("lengthAMode");
1788 return 0; /*notreached*/
1789 }
1790 }
1791
1792 /*------------------------------------------------------------*/
1793 /*--- Disassembling common idioms ---*/
1794 /*------------------------------------------------------------*/
1795
1796 /* Handle binary integer instructions of the form
1797 op E, G meaning
1798 op reg-or-mem, reg
1799 Is passed the a ptr to the modRM byte, the actual operation, and the
1800 data size. Returns the address advanced completely over this
1801 instruction.
1802
1803 E(src) is reg-or-mem
1804 G(dst) is reg.
1805
1806 If E is reg, --> GET %G, tmp
1807 OP %E, tmp
1808 PUT tmp, %G
1809
1810 If E is mem and OP is not reversible,
1811 --> (getAddr E) -> tmpa
1812 LD (tmpa), tmpa
1813 GET %G, tmp2
1814 OP tmpa, tmp2
1815 PUT tmp2, %G
1816
1817 If E is mem and OP is reversible
1818 --> (getAddr E) -> tmpa
1819 LD (tmpa), tmpa
1820 OP %G, tmpa
1821 PUT tmpa, %G
1822 */
1823 static
dis_op2_E_G(UChar sorb,Bool addSubCarry,IROp op8,Bool keep,Int size,Int delta0,const HChar * t_x86opc)1824 UInt dis_op2_E_G ( UChar sorb,
1825 Bool addSubCarry,
1826 IROp op8,
1827 Bool keep,
1828 Int size,
1829 Int delta0,
1830 const HChar* t_x86opc )
1831 {
1832 HChar dis_buf[50];
1833 Int len;
1834 IRType ty = szToITy(size);
1835 IRTemp dst1 = newTemp(ty);
1836 IRTemp src = newTemp(ty);
1837 IRTemp dst0 = newTemp(ty);
1838 UChar rm = getUChar(delta0);
1839 IRTemp addr = IRTemp_INVALID;
1840
1841 /* addSubCarry == True indicates the intended operation is
1842 add-with-carry or subtract-with-borrow. */
1843 if (addSubCarry) {
1844 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8);
1845 vassert(keep);
1846 }
1847
1848 if (epartIsReg(rm)) {
1849 /* Specially handle XOR reg,reg, because that doesn't really
1850 depend on reg, and doing the obvious thing potentially
1851 generates a spurious value check failure due to the bogus
1852 dependency. Ditto SBB reg,reg. */
1853 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry))
1854 && gregOfRM(rm) == eregOfRM(rm)) {
1855 putIReg(size, gregOfRM(rm), mkU(ty,0));
1856 }
1857 assign( dst0, getIReg(size,gregOfRM(rm)) );
1858 assign( src, getIReg(size,eregOfRM(rm)) );
1859
1860 if (addSubCarry && op8 == Iop_Add8) {
1861 helper_ADC( size, dst1, dst0, src,
1862 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
1863 putIReg(size, gregOfRM(rm), mkexpr(dst1));
1864 } else
1865 if (addSubCarry && op8 == Iop_Sub8) {
1866 helper_SBB( size, dst1, dst0, src,
1867 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
1868 putIReg(size, gregOfRM(rm), mkexpr(dst1));
1869 } else {
1870 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
1871 if (isAddSub(op8))
1872 setFlags_DEP1_DEP2(op8, dst0, src, ty);
1873 else
1874 setFlags_DEP1(op8, dst1, ty);
1875 if (keep)
1876 putIReg(size, gregOfRM(rm), mkexpr(dst1));
1877 }
1878
1879 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size),
1880 nameIReg(size,eregOfRM(rm)),
1881 nameIReg(size,gregOfRM(rm)));
1882 return 1+delta0;
1883 } else {
1884 /* E refers to memory */
1885 addr = disAMode ( &len, sorb, delta0, dis_buf);
1886 assign( dst0, getIReg(size,gregOfRM(rm)) );
1887 assign( src, loadLE(szToITy(size), mkexpr(addr)) );
1888
1889 if (addSubCarry && op8 == Iop_Add8) {
1890 helper_ADC( size, dst1, dst0, src,
1891 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
1892 putIReg(size, gregOfRM(rm), mkexpr(dst1));
1893 } else
1894 if (addSubCarry && op8 == Iop_Sub8) {
1895 helper_SBB( size, dst1, dst0, src,
1896 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
1897 putIReg(size, gregOfRM(rm), mkexpr(dst1));
1898 } else {
1899 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
1900 if (isAddSub(op8))
1901 setFlags_DEP1_DEP2(op8, dst0, src, ty);
1902 else
1903 setFlags_DEP1(op8, dst1, ty);
1904 if (keep)
1905 putIReg(size, gregOfRM(rm), mkexpr(dst1));
1906 }
1907
1908 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size),
1909 dis_buf,nameIReg(size,gregOfRM(rm)));
1910 return len+delta0;
1911 }
1912 }
1913
1914
1915
1916 /* Handle binary integer instructions of the form
1917 op G, E meaning
1918 op reg, reg-or-mem
1919 Is passed the a ptr to the modRM byte, the actual operation, and the
1920 data size. Returns the address advanced completely over this
1921 instruction.
1922
1923 G(src) is reg.
1924 E(dst) is reg-or-mem
1925
1926 If E is reg, --> GET %E, tmp
1927 OP %G, tmp
1928 PUT tmp, %E
1929
1930 If E is mem, --> (getAddr E) -> tmpa
1931 LD (tmpa), tmpv
1932 OP %G, tmpv
1933 ST tmpv, (tmpa)
1934 */
1935 static
dis_op2_G_E(UChar sorb,Bool locked,Bool addSubCarry,IROp op8,Bool keep,Int size,Int delta0,const HChar * t_x86opc)1936 UInt dis_op2_G_E ( UChar sorb,
1937 Bool locked,
1938 Bool addSubCarry,
1939 IROp op8,
1940 Bool keep,
1941 Int size,
1942 Int delta0,
1943 const HChar* t_x86opc )
1944 {
1945 HChar dis_buf[50];
1946 Int len;
1947 IRType ty = szToITy(size);
1948 IRTemp dst1 = newTemp(ty);
1949 IRTemp src = newTemp(ty);
1950 IRTemp dst0 = newTemp(ty);
1951 UChar rm = getIByte(delta0);
1952 IRTemp addr = IRTemp_INVALID;
1953
1954 /* addSubCarry == True indicates the intended operation is
1955 add-with-carry or subtract-with-borrow. */
1956 if (addSubCarry) {
1957 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8);
1958 vassert(keep);
1959 }
1960
1961 if (epartIsReg(rm)) {
1962 /* Specially handle XOR reg,reg, because that doesn't really
1963 depend on reg, and doing the obvious thing potentially
1964 generates a spurious value check failure due to the bogus
1965 dependency. Ditto SBB reg,reg.*/
1966 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry))
1967 && gregOfRM(rm) == eregOfRM(rm)) {
1968 putIReg(size, eregOfRM(rm), mkU(ty,0));
1969 }
1970 assign(dst0, getIReg(size,eregOfRM(rm)));
1971 assign(src, getIReg(size,gregOfRM(rm)));
1972
1973 if (addSubCarry && op8 == Iop_Add8) {
1974 helper_ADC( size, dst1, dst0, src,
1975 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
1976 putIReg(size, eregOfRM(rm), mkexpr(dst1));
1977 } else
1978 if (addSubCarry && op8 == Iop_Sub8) {
1979 helper_SBB( size, dst1, dst0, src,
1980 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
1981 putIReg(size, eregOfRM(rm), mkexpr(dst1));
1982 } else {
1983 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
1984 if (isAddSub(op8))
1985 setFlags_DEP1_DEP2(op8, dst0, src, ty);
1986 else
1987 setFlags_DEP1(op8, dst1, ty);
1988 if (keep)
1989 putIReg(size, eregOfRM(rm), mkexpr(dst1));
1990 }
1991
1992 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size),
1993 nameIReg(size,gregOfRM(rm)),
1994 nameIReg(size,eregOfRM(rm)));
1995 return 1+delta0;
1996 }
1997
1998 /* E refers to memory */
1999 {
2000 addr = disAMode ( &len, sorb, delta0, dis_buf);
2001 assign(dst0, loadLE(ty,mkexpr(addr)));
2002 assign(src, getIReg(size,gregOfRM(rm)));
2003
2004 if (addSubCarry && op8 == Iop_Add8) {
2005 if (locked) {
2006 /* cas-style store */
2007 helper_ADC( size, dst1, dst0, src,
2008 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr );
2009 } else {
2010 /* normal store */
2011 helper_ADC( size, dst1, dst0, src,
2012 /*store*/addr, IRTemp_INVALID, 0 );
2013 }
2014 } else
2015 if (addSubCarry && op8 == Iop_Sub8) {
2016 if (locked) {
2017 /* cas-style store */
2018 helper_SBB( size, dst1, dst0, src,
2019 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr );
2020 } else {
2021 /* normal store */
2022 helper_SBB( size, dst1, dst0, src,
2023 /*store*/addr, IRTemp_INVALID, 0 );
2024 }
2025 } else {
2026 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
2027 if (keep) {
2028 if (locked) {
2029 if (0) vex_printf("locked case\n" );
2030 casLE( mkexpr(addr),
2031 mkexpr(dst0)/*expval*/,
2032 mkexpr(dst1)/*newval*/, guest_EIP_curr_instr );
2033 } else {
2034 if (0) vex_printf("nonlocked case\n");
2035 storeLE(mkexpr(addr), mkexpr(dst1));
2036 }
2037 }
2038 if (isAddSub(op8))
2039 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2040 else
2041 setFlags_DEP1(op8, dst1, ty);
2042 }
2043
2044 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size),
2045 nameIReg(size,gregOfRM(rm)), dis_buf);
2046 return len+delta0;
2047 }
2048 }
2049
2050
2051 /* Handle move instructions of the form
2052 mov E, G meaning
2053 mov reg-or-mem, reg
2054 Is passed the a ptr to the modRM byte, and the data size. Returns
2055 the address advanced completely over this instruction.
2056
2057 E(src) is reg-or-mem
2058 G(dst) is reg.
2059
2060 If E is reg, --> GET %E, tmpv
2061 PUT tmpv, %G
2062
2063 If E is mem --> (getAddr E) -> tmpa
2064 LD (tmpa), tmpb
2065 PUT tmpb, %G
2066 */
2067 static
dis_mov_E_G(UChar sorb,Int size,Int delta0)2068 UInt dis_mov_E_G ( UChar sorb,
2069 Int size,
2070 Int delta0 )
2071 {
2072 Int len;
2073 UChar rm = getIByte(delta0);
2074 HChar dis_buf[50];
2075
2076 if (epartIsReg(rm)) {
2077 putIReg(size, gregOfRM(rm), getIReg(size, eregOfRM(rm)));
2078 DIP("mov%c %s,%s\n", nameISize(size),
2079 nameIReg(size,eregOfRM(rm)),
2080 nameIReg(size,gregOfRM(rm)));
2081 return 1+delta0;
2082 }
2083
2084 /* E refers to memory */
2085 {
2086 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf );
2087 putIReg(size, gregOfRM(rm), loadLE(szToITy(size), mkexpr(addr)));
2088 DIP("mov%c %s,%s\n", nameISize(size),
2089 dis_buf,nameIReg(size,gregOfRM(rm)));
2090 return delta0+len;
2091 }
2092 }
2093
2094
2095 /* Handle move instructions of the form
2096 mov G, E meaning
2097 mov reg, reg-or-mem
2098 Is passed the a ptr to the modRM byte, and the data size. Returns
2099 the address advanced completely over this instruction.
2100
2101 G(src) is reg.
2102 E(dst) is reg-or-mem
2103
2104 If E is reg, --> GET %G, tmp
2105 PUT tmp, %E
2106
2107 If E is mem, --> (getAddr E) -> tmpa
2108 GET %G, tmpv
2109 ST tmpv, (tmpa)
2110 */
2111 static
dis_mov_G_E(UChar sorb,Int size,Int delta0)2112 UInt dis_mov_G_E ( UChar sorb,
2113 Int size,
2114 Int delta0 )
2115 {
2116 Int len;
2117 UChar rm = getIByte(delta0);
2118 HChar dis_buf[50];
2119
2120 if (epartIsReg(rm)) {
2121 putIReg(size, eregOfRM(rm), getIReg(size, gregOfRM(rm)));
2122 DIP("mov%c %s,%s\n", nameISize(size),
2123 nameIReg(size,gregOfRM(rm)),
2124 nameIReg(size,eregOfRM(rm)));
2125 return 1+delta0;
2126 }
2127
2128 /* E refers to memory */
2129 {
2130 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf);
2131 storeLE( mkexpr(addr), getIReg(size, gregOfRM(rm)) );
2132 DIP("mov%c %s,%s\n", nameISize(size),
2133 nameIReg(size,gregOfRM(rm)), dis_buf);
2134 return len+delta0;
2135 }
2136 }
2137
2138
2139 /* op $immediate, AL/AX/EAX. */
2140 static
dis_op_imm_A(Int size,Bool carrying,IROp op8,Bool keep,Int delta,const HChar * t_x86opc)2141 UInt dis_op_imm_A ( Int size,
2142 Bool carrying,
2143 IROp op8,
2144 Bool keep,
2145 Int delta,
2146 const HChar* t_x86opc )
2147 {
2148 IRType ty = szToITy(size);
2149 IRTemp dst0 = newTemp(ty);
2150 IRTemp src = newTemp(ty);
2151 IRTemp dst1 = newTemp(ty);
2152 UInt lit = getUDisp(size,delta);
2153 assign(dst0, getIReg(size,R_EAX));
2154 assign(src, mkU(ty,lit));
2155
2156 if (isAddSub(op8) && !carrying) {
2157 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
2158 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2159 }
2160 else
2161 if (isLogic(op8)) {
2162 vassert(!carrying);
2163 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
2164 setFlags_DEP1(op8, dst1, ty);
2165 }
2166 else
2167 if (op8 == Iop_Add8 && carrying) {
2168 helper_ADC( size, dst1, dst0, src,
2169 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2170 }
2171 else
2172 if (op8 == Iop_Sub8 && carrying) {
2173 helper_SBB( size, dst1, dst0, src,
2174 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2175 }
2176 else
2177 vpanic("dis_op_imm_A(x86,guest)");
2178
2179 if (keep)
2180 putIReg(size, R_EAX, mkexpr(dst1));
2181
2182 DIP("%s%c $0x%x, %s\n", t_x86opc, nameISize(size),
2183 lit, nameIReg(size,R_EAX));
2184 return delta+size;
2185 }
2186
2187
2188 /* Sign- and Zero-extending moves. */
2189 static
dis_movx_E_G(UChar sorb,Int delta,Int szs,Int szd,Bool sign_extend)2190 UInt dis_movx_E_G ( UChar sorb,
2191 Int delta, Int szs, Int szd, Bool sign_extend )
2192 {
2193 UChar rm = getIByte(delta);
2194 if (epartIsReg(rm)) {
2195 if (szd == szs) {
2196 // mutant case. See #250799
2197 putIReg(szd, gregOfRM(rm),
2198 getIReg(szs,eregOfRM(rm)));
2199 } else {
2200 // normal case
2201 putIReg(szd, gregOfRM(rm),
2202 unop(mkWidenOp(szs,szd,sign_extend),
2203 getIReg(szs,eregOfRM(rm))));
2204 }
2205 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
2206 nameISize(szs), nameISize(szd),
2207 nameIReg(szs,eregOfRM(rm)),
2208 nameIReg(szd,gregOfRM(rm)));
2209 return 1+delta;
2210 }
2211
2212 /* E refers to memory */
2213 {
2214 Int len;
2215 HChar dis_buf[50];
2216 IRTemp addr = disAMode ( &len, sorb, delta, dis_buf );
2217 if (szd == szs) {
2218 // mutant case. See #250799
2219 putIReg(szd, gregOfRM(rm),
2220 loadLE(szToITy(szs),mkexpr(addr)));
2221 } else {
2222 // normal case
2223 putIReg(szd, gregOfRM(rm),
2224 unop(mkWidenOp(szs,szd,sign_extend),
2225 loadLE(szToITy(szs),mkexpr(addr))));
2226 }
2227 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
2228 nameISize(szs), nameISize(szd),
2229 dis_buf, nameIReg(szd,gregOfRM(rm)));
2230 return len+delta;
2231 }
2232 }
2233
2234
2235 /* Generate code to divide ArchRegs EDX:EAX / DX:AX / AX by the 32 /
2236 16 / 8 bit quantity in the given IRTemp. */
2237 static
codegen_div(Int sz,IRTemp t,Bool signed_divide)2238 void codegen_div ( Int sz, IRTemp t, Bool signed_divide )
2239 {
2240 IROp op = signed_divide ? Iop_DivModS64to32 : Iop_DivModU64to32;
2241 IRTemp src64 = newTemp(Ity_I64);
2242 IRTemp dst64 = newTemp(Ity_I64);
2243 switch (sz) {
2244 case 4:
2245 assign( src64, binop(Iop_32HLto64,
2246 getIReg(4,R_EDX), getIReg(4,R_EAX)) );
2247 assign( dst64, binop(op, mkexpr(src64), mkexpr(t)) );
2248 putIReg( 4, R_EAX, unop(Iop_64to32,mkexpr(dst64)) );
2249 putIReg( 4, R_EDX, unop(Iop_64HIto32,mkexpr(dst64)) );
2250 break;
2251 case 2: {
2252 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
2253 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
2254 assign( src64, unop(widen3264,
2255 binop(Iop_16HLto32,
2256 getIReg(2,R_EDX), getIReg(2,R_EAX))) );
2257 assign( dst64, binop(op, mkexpr(src64), unop(widen1632,mkexpr(t))) );
2258 putIReg( 2, R_EAX, unop(Iop_32to16,unop(Iop_64to32,mkexpr(dst64))) );
2259 putIReg( 2, R_EDX, unop(Iop_32to16,unop(Iop_64HIto32,mkexpr(dst64))) );
2260 break;
2261 }
2262 case 1: {
2263 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
2264 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
2265 IROp widen816 = signed_divide ? Iop_8Sto16 : Iop_8Uto16;
2266 assign( src64, unop(widen3264, unop(widen1632, getIReg(2,R_EAX))) );
2267 assign( dst64,
2268 binop(op, mkexpr(src64),
2269 unop(widen1632, unop(widen816, mkexpr(t)))) );
2270 putIReg( 1, R_AL, unop(Iop_16to8, unop(Iop_32to16,
2271 unop(Iop_64to32,mkexpr(dst64)))) );
2272 putIReg( 1, R_AH, unop(Iop_16to8, unop(Iop_32to16,
2273 unop(Iop_64HIto32,mkexpr(dst64)))) );
2274 break;
2275 }
2276 default: vpanic("codegen_div(x86)");
2277 }
2278 }
2279
2280
2281 static
dis_Grp1(UChar sorb,Bool locked,Int delta,UChar modrm,Int am_sz,Int d_sz,Int sz,UInt d32)2282 UInt dis_Grp1 ( UChar sorb, Bool locked,
2283 Int delta, UChar modrm,
2284 Int am_sz, Int d_sz, Int sz, UInt d32 )
2285 {
2286 Int len;
2287 HChar dis_buf[50];
2288 IRType ty = szToITy(sz);
2289 IRTemp dst1 = newTemp(ty);
2290 IRTemp src = newTemp(ty);
2291 IRTemp dst0 = newTemp(ty);
2292 IRTemp addr = IRTemp_INVALID;
2293 IROp op8 = Iop_INVALID;
2294 UInt mask = sz==1 ? 0xFF : (sz==2 ? 0xFFFF : 0xFFFFFFFF);
2295
2296 switch (gregOfRM(modrm)) {
2297 case 0: op8 = Iop_Add8; break; case 1: op8 = Iop_Or8; break;
2298 case 2: break; // ADC
2299 case 3: break; // SBB
2300 case 4: op8 = Iop_And8; break; case 5: op8 = Iop_Sub8; break;
2301 case 6: op8 = Iop_Xor8; break; case 7: op8 = Iop_Sub8; break;
2302 /*NOTREACHED*/
2303 default: vpanic("dis_Grp1: unhandled case");
2304 }
2305
2306 if (epartIsReg(modrm)) {
2307 vassert(am_sz == 1);
2308
2309 assign(dst0, getIReg(sz,eregOfRM(modrm)));
2310 assign(src, mkU(ty,d32 & mask));
2311
2312 if (gregOfRM(modrm) == 2 /* ADC */) {
2313 helper_ADC( sz, dst1, dst0, src,
2314 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2315 } else
2316 if (gregOfRM(modrm) == 3 /* SBB */) {
2317 helper_SBB( sz, dst1, dst0, src,
2318 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2319 } else {
2320 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
2321 if (isAddSub(op8))
2322 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2323 else
2324 setFlags_DEP1(op8, dst1, ty);
2325 }
2326
2327 if (gregOfRM(modrm) < 7)
2328 putIReg(sz, eregOfRM(modrm), mkexpr(dst1));
2329
2330 delta += (am_sz + d_sz);
2331 DIP("%s%c $0x%x, %s\n", nameGrp1(gregOfRM(modrm)), nameISize(sz), d32,
2332 nameIReg(sz,eregOfRM(modrm)));
2333 } else {
2334 addr = disAMode ( &len, sorb, delta, dis_buf);
2335
2336 assign(dst0, loadLE(ty,mkexpr(addr)));
2337 assign(src, mkU(ty,d32 & mask));
2338
2339 if (gregOfRM(modrm) == 2 /* ADC */) {
2340 if (locked) {
2341 /* cas-style store */
2342 helper_ADC( sz, dst1, dst0, src,
2343 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr );
2344 } else {
2345 /* normal store */
2346 helper_ADC( sz, dst1, dst0, src,
2347 /*store*/addr, IRTemp_INVALID, 0 );
2348 }
2349 } else
2350 if (gregOfRM(modrm) == 3 /* SBB */) {
2351 if (locked) {
2352 /* cas-style store */
2353 helper_SBB( sz, dst1, dst0, src,
2354 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr );
2355 } else {
2356 /* normal store */
2357 helper_SBB( sz, dst1, dst0, src,
2358 /*store*/addr, IRTemp_INVALID, 0 );
2359 }
2360 } else {
2361 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
2362 if (gregOfRM(modrm) < 7) {
2363 if (locked) {
2364 casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/,
2365 mkexpr(dst1)/*newVal*/,
2366 guest_EIP_curr_instr );
2367 } else {
2368 storeLE(mkexpr(addr), mkexpr(dst1));
2369 }
2370 }
2371 if (isAddSub(op8))
2372 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2373 else
2374 setFlags_DEP1(op8, dst1, ty);
2375 }
2376
2377 delta += (len+d_sz);
2378 DIP("%s%c $0x%x, %s\n", nameGrp1(gregOfRM(modrm)), nameISize(sz),
2379 d32, dis_buf);
2380 }
2381 return delta;
2382 }
2383
2384
2385 /* Group 2 extended opcodes. shift_expr must be an 8-bit typed
2386 expression. */
2387
2388 static
dis_Grp2(UChar sorb,Int delta,UChar modrm,Int am_sz,Int d_sz,Int sz,IRExpr * shift_expr,const HChar * shift_expr_txt,Bool * decode_OK)2389 UInt dis_Grp2 ( UChar sorb,
2390 Int delta, UChar modrm,
2391 Int am_sz, Int d_sz, Int sz, IRExpr* shift_expr,
2392 const HChar* shift_expr_txt, Bool* decode_OK )
2393 {
2394 /* delta on entry points at the modrm byte. */
2395 HChar dis_buf[50];
2396 Int len;
2397 Bool isShift, isRotate, isRotateC;
2398 IRType ty = szToITy(sz);
2399 IRTemp dst0 = newTemp(ty);
2400 IRTemp dst1 = newTemp(ty);
2401 IRTemp addr = IRTemp_INVALID;
2402
2403 *decode_OK = True;
2404
2405 vassert(sz == 1 || sz == 2 || sz == 4);
2406
2407 /* Put value to shift/rotate in dst0. */
2408 if (epartIsReg(modrm)) {
2409 assign(dst0, getIReg(sz, eregOfRM(modrm)));
2410 delta += (am_sz + d_sz);
2411 } else {
2412 addr = disAMode ( &len, sorb, delta, dis_buf);
2413 assign(dst0, loadLE(ty,mkexpr(addr)));
2414 delta += len + d_sz;
2415 }
2416
2417 isShift = False;
2418 switch (gregOfRM(modrm)) { case 4: case 5: case 6: case 7: isShift = True; }
2419
2420 isRotate = False;
2421 switch (gregOfRM(modrm)) { case 0: case 1: isRotate = True; }
2422
2423 isRotateC = False;
2424 switch (gregOfRM(modrm)) { case 2: case 3: isRotateC = True; }
2425
2426 if (!isShift && !isRotate && !isRotateC) {
2427 /*NOTREACHED*/
2428 vpanic("dis_Grp2(Reg): unhandled case(x86)");
2429 }
2430
2431 if (isRotateC) {
2432 /* call a helper; these insns are so ridiculous they do not
2433 deserve better */
2434 Bool left = toBool(gregOfRM(modrm) == 2);
2435 IRTemp r64 = newTemp(Ity_I64);
2436 IRExpr** args
2437 = mkIRExprVec_4( widenUto32(mkexpr(dst0)), /* thing to rotate */
2438 widenUto32(shift_expr), /* rotate amount */
2439 widenUto32(mk_x86g_calculate_eflags_all()),
2440 mkU32(sz) );
2441 assign( r64, mkIRExprCCall(
2442 Ity_I64,
2443 0/*regparm*/,
2444 left ? "x86g_calculate_RCL" : "x86g_calculate_RCR",
2445 left ? &x86g_calculate_RCL : &x86g_calculate_RCR,
2446 args
2447 )
2448 );
2449 /* new eflags in hi half r64; new value in lo half r64 */
2450 assign( dst1, narrowTo(ty, unop(Iop_64to32, mkexpr(r64))) );
2451 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
2452 stmt( IRStmt_Put( OFFB_CC_DEP1, unop(Iop_64HIto32, mkexpr(r64)) ));
2453 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
2454 /* Set NDEP even though it isn't used. This makes redundant-PUT
2455 elimination of previous stores to this field work better. */
2456 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
2457 }
2458
2459 if (isShift) {
2460
2461 IRTemp pre32 = newTemp(Ity_I32);
2462 IRTemp res32 = newTemp(Ity_I32);
2463 IRTemp res32ss = newTemp(Ity_I32);
2464 IRTemp shift_amt = newTemp(Ity_I8);
2465 IROp op32;
2466
2467 switch (gregOfRM(modrm)) {
2468 case 4: op32 = Iop_Shl32; break;
2469 case 5: op32 = Iop_Shr32; break;
2470 case 6: op32 = Iop_Shl32; break;
2471 case 7: op32 = Iop_Sar32; break;
2472 /*NOTREACHED*/
2473 default: vpanic("dis_Grp2:shift"); break;
2474 }
2475
2476 /* Widen the value to be shifted to 32 bits, do the shift, and
2477 narrow back down. This seems surprisingly long-winded, but
2478 unfortunately the Intel semantics requires that 8/16-bit
2479 shifts give defined results for shift values all the way up
2480 to 31, and this seems the simplest way to do it. It has the
2481 advantage that the only IR level shifts generated are of 32
2482 bit values, and the shift amount is guaranteed to be in the
2483 range 0 .. 31, thereby observing the IR semantics requiring
2484 all shift values to be in the range 0 .. 2^word_size-1. */
2485
2486 /* shift_amt = shift_expr & 31, regardless of operation size */
2487 assign( shift_amt, binop(Iop_And8, shift_expr, mkU8(31)) );
2488
2489 /* suitably widen the value to be shifted to 32 bits. */
2490 assign( pre32, op32==Iop_Sar32 ? widenSto32(mkexpr(dst0))
2491 : widenUto32(mkexpr(dst0)) );
2492
2493 /* res32 = pre32 `shift` shift_amt */
2494 assign( res32, binop(op32, mkexpr(pre32), mkexpr(shift_amt)) );
2495
2496 /* res32ss = pre32 `shift` ((shift_amt - 1) & 31) */
2497 assign( res32ss,
2498 binop(op32,
2499 mkexpr(pre32),
2500 binop(Iop_And8,
2501 binop(Iop_Sub8,
2502 mkexpr(shift_amt), mkU8(1)),
2503 mkU8(31))) );
2504
2505 /* Build the flags thunk. */
2506 setFlags_DEP1_DEP2_shift(op32, res32, res32ss, ty, shift_amt);
2507
2508 /* Narrow the result back down. */
2509 assign( dst1, narrowTo(ty, mkexpr(res32)) );
2510
2511 } /* if (isShift) */
2512
2513 else
2514 if (isRotate) {
2515 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2);
2516 Bool left = toBool(gregOfRM(modrm) == 0);
2517 IRTemp rot_amt = newTemp(Ity_I8);
2518 IRTemp rot_amt32 = newTemp(Ity_I8);
2519 IRTemp oldFlags = newTemp(Ity_I32);
2520
2521 /* rot_amt = shift_expr & mask */
2522 /* By masking the rotate amount thusly, the IR-level Shl/Shr
2523 expressions never shift beyond the word size and thus remain
2524 well defined. */
2525 assign(rot_amt32, binop(Iop_And8, shift_expr, mkU8(31)));
2526
2527 if (ty == Ity_I32)
2528 assign(rot_amt, mkexpr(rot_amt32));
2529 else
2530 assign(rot_amt, binop(Iop_And8, mkexpr(rot_amt32), mkU8(8*sz-1)));
2531
2532 if (left) {
2533
2534 /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */
2535 assign(dst1,
2536 binop( mkSizedOp(ty,Iop_Or8),
2537 binop( mkSizedOp(ty,Iop_Shl8),
2538 mkexpr(dst0),
2539 mkexpr(rot_amt)
2540 ),
2541 binop( mkSizedOp(ty,Iop_Shr8),
2542 mkexpr(dst0),
2543 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
2544 )
2545 )
2546 );
2547 ccOp += X86G_CC_OP_ROLB;
2548
2549 } else { /* right */
2550
2551 /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */
2552 assign(dst1,
2553 binop( mkSizedOp(ty,Iop_Or8),
2554 binop( mkSizedOp(ty,Iop_Shr8),
2555 mkexpr(dst0),
2556 mkexpr(rot_amt)
2557 ),
2558 binop( mkSizedOp(ty,Iop_Shl8),
2559 mkexpr(dst0),
2560 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
2561 )
2562 )
2563 );
2564 ccOp += X86G_CC_OP_RORB;
2565
2566 }
2567
2568 /* dst1 now holds the rotated value. Build flag thunk. We
2569 need the resulting value for this, and the previous flags.
2570 Except don't set it if the rotate count is zero. */
2571
2572 assign(oldFlags, mk_x86g_calculate_eflags_all());
2573
2574 /* rot_amt32 :: Ity_I8. We need to convert it to I1. */
2575 IRTemp rot_amt32b = newTemp(Ity_I1);
2576 assign(rot_amt32b, binop(Iop_CmpNE8, mkexpr(rot_amt32), mkU8(0)) );
2577
2578 /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */
2579 stmt( IRStmt_Put( OFFB_CC_OP,
2580 IRExpr_ITE( mkexpr(rot_amt32b),
2581 mkU32(ccOp),
2582 IRExpr_Get(OFFB_CC_OP,Ity_I32) ) ));
2583 stmt( IRStmt_Put( OFFB_CC_DEP1,
2584 IRExpr_ITE( mkexpr(rot_amt32b),
2585 widenUto32(mkexpr(dst1)),
2586 IRExpr_Get(OFFB_CC_DEP1,Ity_I32) ) ));
2587 stmt( IRStmt_Put( OFFB_CC_DEP2,
2588 IRExpr_ITE( mkexpr(rot_amt32b),
2589 mkU32(0),
2590 IRExpr_Get(OFFB_CC_DEP2,Ity_I32) ) ));
2591 stmt( IRStmt_Put( OFFB_CC_NDEP,
2592 IRExpr_ITE( mkexpr(rot_amt32b),
2593 mkexpr(oldFlags),
2594 IRExpr_Get(OFFB_CC_NDEP,Ity_I32) ) ));
2595 } /* if (isRotate) */
2596
2597 /* Save result, and finish up. */
2598 if (epartIsReg(modrm)) {
2599 putIReg(sz, eregOfRM(modrm), mkexpr(dst1));
2600 if (vex_traceflags & VEX_TRACE_FE) {
2601 vex_printf("%s%c ",
2602 nameGrp2(gregOfRM(modrm)), nameISize(sz) );
2603 if (shift_expr_txt)
2604 vex_printf("%s", shift_expr_txt);
2605 else
2606 ppIRExpr(shift_expr);
2607 vex_printf(", %s\n", nameIReg(sz,eregOfRM(modrm)));
2608 }
2609 } else {
2610 storeLE(mkexpr(addr), mkexpr(dst1));
2611 if (vex_traceflags & VEX_TRACE_FE) {
2612 vex_printf("%s%c ",
2613 nameGrp2(gregOfRM(modrm)), nameISize(sz) );
2614 if (shift_expr_txt)
2615 vex_printf("%s", shift_expr_txt);
2616 else
2617 ppIRExpr(shift_expr);
2618 vex_printf(", %s\n", dis_buf);
2619 }
2620 }
2621 return delta;
2622 }
2623
2624
2625 /* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */
2626 static
dis_Grp8_Imm(UChar sorb,Bool locked,Int delta,UChar modrm,Int am_sz,Int sz,UInt src_val,Bool * decode_OK)2627 UInt dis_Grp8_Imm ( UChar sorb,
2628 Bool locked,
2629 Int delta, UChar modrm,
2630 Int am_sz, Int sz, UInt src_val,
2631 Bool* decode_OK )
2632 {
2633 /* src_val denotes a d8.
2634 And delta on entry points at the modrm byte. */
2635
2636 IRType ty = szToITy(sz);
2637 IRTemp t2 = newTemp(Ity_I32);
2638 IRTemp t2m = newTemp(Ity_I32);
2639 IRTemp t_addr = IRTemp_INVALID;
2640 HChar dis_buf[50];
2641 UInt mask;
2642
2643 /* we're optimists :-) */
2644 *decode_OK = True;
2645
2646 /* Limit src_val -- the bit offset -- to something within a word.
2647 The Intel docs say that literal offsets larger than a word are
2648 masked in this way. */
2649 switch (sz) {
2650 case 2: src_val &= 15; break;
2651 case 4: src_val &= 31; break;
2652 default: *decode_OK = False; return delta;
2653 }
2654
2655 /* Invent a mask suitable for the operation. */
2656 switch (gregOfRM(modrm)) {
2657 case 4: /* BT */ mask = 0; break;
2658 case 5: /* BTS */ mask = 1 << src_val; break;
2659 case 6: /* BTR */ mask = ~(1 << src_val); break;
2660 case 7: /* BTC */ mask = 1 << src_val; break;
2661 /* If this needs to be extended, probably simplest to make a
2662 new function to handle the other cases (0 .. 3). The
2663 Intel docs do however not indicate any use for 0 .. 3, so
2664 we don't expect this to happen. */
2665 default: *decode_OK = False; return delta;
2666 }
2667
2668 /* Fetch the value to be tested and modified into t2, which is
2669 32-bits wide regardless of sz. */
2670 if (epartIsReg(modrm)) {
2671 vassert(am_sz == 1);
2672 assign( t2, widenUto32(getIReg(sz, eregOfRM(modrm))) );
2673 delta += (am_sz + 1);
2674 DIP("%s%c $0x%x, %s\n", nameGrp8(gregOfRM(modrm)), nameISize(sz),
2675 src_val, nameIReg(sz,eregOfRM(modrm)));
2676 } else {
2677 Int len;
2678 t_addr = disAMode ( &len, sorb, delta, dis_buf);
2679 delta += (len+1);
2680 assign( t2, widenUto32(loadLE(ty, mkexpr(t_addr))) );
2681 DIP("%s%c $0x%x, %s\n", nameGrp8(gregOfRM(modrm)), nameISize(sz),
2682 src_val, dis_buf);
2683 }
2684
2685 /* Compute the new value into t2m, if non-BT. */
2686 switch (gregOfRM(modrm)) {
2687 case 4: /* BT */
2688 break;
2689 case 5: /* BTS */
2690 assign( t2m, binop(Iop_Or32, mkU32(mask), mkexpr(t2)) );
2691 break;
2692 case 6: /* BTR */
2693 assign( t2m, binop(Iop_And32, mkU32(mask), mkexpr(t2)) );
2694 break;
2695 case 7: /* BTC */
2696 assign( t2m, binop(Iop_Xor32, mkU32(mask), mkexpr(t2)) );
2697 break;
2698 default:
2699 /*NOTREACHED*/ /*the previous switch guards this*/
2700 vassert(0);
2701 }
2702
2703 /* Write the result back, if non-BT. If the CAS fails then we
2704 side-exit from the trace at this point, and so the flag state is
2705 not affected. This is of course as required. */
2706 if (gregOfRM(modrm) != 4 /* BT */) {
2707 if (epartIsReg(modrm)) {
2708 putIReg(sz, eregOfRM(modrm), narrowTo(ty, mkexpr(t2m)));
2709 } else {
2710 if (locked) {
2711 casLE( mkexpr(t_addr),
2712 narrowTo(ty, mkexpr(t2))/*expd*/,
2713 narrowTo(ty, mkexpr(t2m))/*new*/,
2714 guest_EIP_curr_instr );
2715 } else {
2716 storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m)));
2717 }
2718 }
2719 }
2720
2721 /* Copy relevant bit from t2 into the carry flag. */
2722 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
2723 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
2724 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
2725 stmt( IRStmt_Put(
2726 OFFB_CC_DEP1,
2727 binop(Iop_And32,
2728 binop(Iop_Shr32, mkexpr(t2), mkU8(src_val)),
2729 mkU32(1))
2730 ));
2731 /* Set NDEP even though it isn't used. This makes redundant-PUT
2732 elimination of previous stores to this field work better. */
2733 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
2734
2735 return delta;
2736 }
2737
2738
2739 /* Signed/unsigned widening multiply. Generate IR to multiply the
2740 value in EAX/AX/AL by the given IRTemp, and park the result in
2741 EDX:EAX/DX:AX/AX.
2742 */
codegen_mulL_A_D(Int sz,Bool syned,IRTemp tmp,const HChar * tmp_txt)2743 static void codegen_mulL_A_D ( Int sz, Bool syned,
2744 IRTemp tmp, const HChar* tmp_txt )
2745 {
2746 IRType ty = szToITy(sz);
2747 IRTemp t1 = newTemp(ty);
2748
2749 assign( t1, getIReg(sz, R_EAX) );
2750
2751 switch (ty) {
2752 case Ity_I32: {
2753 IRTemp res64 = newTemp(Ity_I64);
2754 IRTemp resHi = newTemp(Ity_I32);
2755 IRTemp resLo = newTemp(Ity_I32);
2756 IROp mulOp = syned ? Iop_MullS32 : Iop_MullU32;
2757 UInt tBaseOp = syned ? X86G_CC_OP_SMULB : X86G_CC_OP_UMULB;
2758 setFlags_MUL ( Ity_I32, t1, tmp, tBaseOp );
2759 assign( res64, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
2760 assign( resHi, unop(Iop_64HIto32,mkexpr(res64)));
2761 assign( resLo, unop(Iop_64to32,mkexpr(res64)));
2762 putIReg(4, R_EDX, mkexpr(resHi));
2763 putIReg(4, R_EAX, mkexpr(resLo));
2764 break;
2765 }
2766 case Ity_I16: {
2767 IRTemp res32 = newTemp(Ity_I32);
2768 IRTemp resHi = newTemp(Ity_I16);
2769 IRTemp resLo = newTemp(Ity_I16);
2770 IROp mulOp = syned ? Iop_MullS16 : Iop_MullU16;
2771 UInt tBaseOp = syned ? X86G_CC_OP_SMULB : X86G_CC_OP_UMULB;
2772 setFlags_MUL ( Ity_I16, t1, tmp, tBaseOp );
2773 assign( res32, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
2774 assign( resHi, unop(Iop_32HIto16,mkexpr(res32)));
2775 assign( resLo, unop(Iop_32to16,mkexpr(res32)));
2776 putIReg(2, R_EDX, mkexpr(resHi));
2777 putIReg(2, R_EAX, mkexpr(resLo));
2778 break;
2779 }
2780 case Ity_I8: {
2781 IRTemp res16 = newTemp(Ity_I16);
2782 IRTemp resHi = newTemp(Ity_I8);
2783 IRTemp resLo = newTemp(Ity_I8);
2784 IROp mulOp = syned ? Iop_MullS8 : Iop_MullU8;
2785 UInt tBaseOp = syned ? X86G_CC_OP_SMULB : X86G_CC_OP_UMULB;
2786 setFlags_MUL ( Ity_I8, t1, tmp, tBaseOp );
2787 assign( res16, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
2788 assign( resHi, unop(Iop_16HIto8,mkexpr(res16)));
2789 assign( resLo, unop(Iop_16to8,mkexpr(res16)));
2790 putIReg(2, R_EAX, mkexpr(res16));
2791 break;
2792 }
2793 default:
2794 vpanic("codegen_mulL_A_D(x86)");
2795 }
2796 DIP("%s%c %s\n", syned ? "imul" : "mul", nameISize(sz), tmp_txt);
2797 }
2798
2799
2800 /* Group 3 extended opcodes. */
2801 static
dis_Grp3(UChar sorb,Bool locked,Int sz,Int delta,Bool * decode_OK)2802 UInt dis_Grp3 ( UChar sorb, Bool locked, Int sz, Int delta, Bool* decode_OK )
2803 {
2804 UInt d32;
2805 UChar modrm;
2806 HChar dis_buf[50];
2807 Int len;
2808 IRTemp addr;
2809 IRType ty = szToITy(sz);
2810 IRTemp t1 = newTemp(ty);
2811 IRTemp dst1, src, dst0;
2812
2813 *decode_OK = True; /* may change this later */
2814
2815 modrm = getIByte(delta);
2816
2817 if (locked && (gregOfRM(modrm) != 2 && gregOfRM(modrm) != 3)) {
2818 /* LOCK prefix only allowed with not and neg subopcodes */
2819 *decode_OK = False;
2820 return delta;
2821 }
2822
2823 if (epartIsReg(modrm)) {
2824 switch (gregOfRM(modrm)) {
2825 case 0: { /* TEST */
2826 delta++; d32 = getUDisp(sz, delta); delta += sz;
2827 dst1 = newTemp(ty);
2828 assign(dst1, binop(mkSizedOp(ty,Iop_And8),
2829 getIReg(sz,eregOfRM(modrm)),
2830 mkU(ty,d32)));
2831 setFlags_DEP1( Iop_And8, dst1, ty );
2832 DIP("test%c $0x%x, %s\n", nameISize(sz), d32,
2833 nameIReg(sz, eregOfRM(modrm)));
2834 break;
2835 }
2836 case 1: /* UNDEFINED */
2837 /* The Intel docs imply this insn is undefined and binutils
2838 agrees. Unfortunately Core 2 will run it (with who
2839 knows what result?) sandpile.org reckons it's an alias
2840 for case 0. We play safe. */
2841 *decode_OK = False;
2842 break;
2843 case 2: /* NOT */
2844 delta++;
2845 putIReg(sz, eregOfRM(modrm),
2846 unop(mkSizedOp(ty,Iop_Not8),
2847 getIReg(sz, eregOfRM(modrm))));
2848 DIP("not%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
2849 break;
2850 case 3: /* NEG */
2851 delta++;
2852 dst0 = newTemp(ty);
2853 src = newTemp(ty);
2854 dst1 = newTemp(ty);
2855 assign(dst0, mkU(ty,0));
2856 assign(src, getIReg(sz,eregOfRM(modrm)));
2857 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0), mkexpr(src)));
2858 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
2859 putIReg(sz, eregOfRM(modrm), mkexpr(dst1));
2860 DIP("neg%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
2861 break;
2862 case 4: /* MUL (unsigned widening) */
2863 delta++;
2864 src = newTemp(ty);
2865 assign(src, getIReg(sz,eregOfRM(modrm)));
2866 codegen_mulL_A_D ( sz, False, src, nameIReg(sz,eregOfRM(modrm)) );
2867 break;
2868 case 5: /* IMUL (signed widening) */
2869 delta++;
2870 src = newTemp(ty);
2871 assign(src, getIReg(sz,eregOfRM(modrm)));
2872 codegen_mulL_A_D ( sz, True, src, nameIReg(sz,eregOfRM(modrm)) );
2873 break;
2874 case 6: /* DIV */
2875 delta++;
2876 assign( t1, getIReg(sz, eregOfRM(modrm)) );
2877 codegen_div ( sz, t1, False );
2878 DIP("div%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
2879 break;
2880 case 7: /* IDIV */
2881 delta++;
2882 assign( t1, getIReg(sz, eregOfRM(modrm)) );
2883 codegen_div ( sz, t1, True );
2884 DIP("idiv%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
2885 break;
2886 default:
2887 /* This can't happen - gregOfRM should return 0 .. 7 only */
2888 vpanic("Grp3(x86)");
2889 }
2890 } else {
2891 addr = disAMode ( &len, sorb, delta, dis_buf );
2892 t1 = newTemp(ty);
2893 delta += len;
2894 assign(t1, loadLE(ty,mkexpr(addr)));
2895 switch (gregOfRM(modrm)) {
2896 case 0: { /* TEST */
2897 d32 = getUDisp(sz, delta); delta += sz;
2898 dst1 = newTemp(ty);
2899 assign(dst1, binop(mkSizedOp(ty,Iop_And8),
2900 mkexpr(t1), mkU(ty,d32)));
2901 setFlags_DEP1( Iop_And8, dst1, ty );
2902 DIP("test%c $0x%x, %s\n", nameISize(sz), d32, dis_buf);
2903 break;
2904 }
2905 case 1: /* UNDEFINED */
2906 /* See comment above on R case */
2907 *decode_OK = False;
2908 break;
2909 case 2: /* NOT */
2910 dst1 = newTemp(ty);
2911 assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1)));
2912 if (locked) {
2913 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
2914 guest_EIP_curr_instr );
2915 } else {
2916 storeLE( mkexpr(addr), mkexpr(dst1) );
2917 }
2918 DIP("not%c %s\n", nameISize(sz), dis_buf);
2919 break;
2920 case 3: /* NEG */
2921 dst0 = newTemp(ty);
2922 src = newTemp(ty);
2923 dst1 = newTemp(ty);
2924 assign(dst0, mkU(ty,0));
2925 assign(src, mkexpr(t1));
2926 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8),
2927 mkexpr(dst0), mkexpr(src)));
2928 if (locked) {
2929 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
2930 guest_EIP_curr_instr );
2931 } else {
2932 storeLE( mkexpr(addr), mkexpr(dst1) );
2933 }
2934 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
2935 DIP("neg%c %s\n", nameISize(sz), dis_buf);
2936 break;
2937 case 4: /* MUL */
2938 codegen_mulL_A_D ( sz, False, t1, dis_buf );
2939 break;
2940 case 5: /* IMUL */
2941 codegen_mulL_A_D ( sz, True, t1, dis_buf );
2942 break;
2943 case 6: /* DIV */
2944 codegen_div ( sz, t1, False );
2945 DIP("div%c %s\n", nameISize(sz), dis_buf);
2946 break;
2947 case 7: /* IDIV */
2948 codegen_div ( sz, t1, True );
2949 DIP("idiv%c %s\n", nameISize(sz), dis_buf);
2950 break;
2951 default:
2952 /* This can't happen - gregOfRM should return 0 .. 7 only */
2953 vpanic("Grp3(x86)");
2954 }
2955 }
2956 return delta;
2957 }
2958
2959
2960 /* Group 4 extended opcodes. */
2961 static
dis_Grp4(UChar sorb,Bool locked,Int delta,Bool * decode_OK)2962 UInt dis_Grp4 ( UChar sorb, Bool locked, Int delta, Bool* decode_OK )
2963 {
2964 Int alen;
2965 UChar modrm;
2966 HChar dis_buf[50];
2967 IRType ty = Ity_I8;
2968 IRTemp t1 = newTemp(ty);
2969 IRTemp t2 = newTemp(ty);
2970
2971 *decode_OK = True;
2972
2973 modrm = getIByte(delta);
2974
2975 if (locked && (gregOfRM(modrm) != 0 && gregOfRM(modrm) != 1)) {
2976 /* LOCK prefix only allowed with inc and dec subopcodes */
2977 *decode_OK = False;
2978 return delta;
2979 }
2980
2981 if (epartIsReg(modrm)) {
2982 assign(t1, getIReg(1, eregOfRM(modrm)));
2983 switch (gregOfRM(modrm)) {
2984 case 0: /* INC */
2985 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
2986 putIReg(1, eregOfRM(modrm), mkexpr(t2));
2987 setFlags_INC_DEC( True, t2, ty );
2988 break;
2989 case 1: /* DEC */
2990 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
2991 putIReg(1, eregOfRM(modrm), mkexpr(t2));
2992 setFlags_INC_DEC( False, t2, ty );
2993 break;
2994 default:
2995 *decode_OK = False;
2996 return delta;
2997 }
2998 delta++;
2999 DIP("%sb %s\n", nameGrp4(gregOfRM(modrm)),
3000 nameIReg(1, eregOfRM(modrm)));
3001 } else {
3002 IRTemp addr = disAMode ( &alen, sorb, delta, dis_buf );
3003 assign( t1, loadLE(ty, mkexpr(addr)) );
3004 switch (gregOfRM(modrm)) {
3005 case 0: /* INC */
3006 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
3007 if (locked) {
3008 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
3009 guest_EIP_curr_instr );
3010 } else {
3011 storeLE( mkexpr(addr), mkexpr(t2) );
3012 }
3013 setFlags_INC_DEC( True, t2, ty );
3014 break;
3015 case 1: /* DEC */
3016 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
3017 if (locked) {
3018 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
3019 guest_EIP_curr_instr );
3020 } else {
3021 storeLE( mkexpr(addr), mkexpr(t2) );
3022 }
3023 setFlags_INC_DEC( False, t2, ty );
3024 break;
3025 default:
3026 *decode_OK = False;
3027 return delta;
3028 }
3029 delta += alen;
3030 DIP("%sb %s\n", nameGrp4(gregOfRM(modrm)), dis_buf);
3031 }
3032 return delta;
3033 }
3034
3035
3036 /* Group 5 extended opcodes. */
3037 static
dis_Grp5(UChar sorb,Bool locked,Int sz,Int delta,DisResult * dres,Bool * decode_OK)3038 UInt dis_Grp5 ( UChar sorb, Bool locked, Int sz, Int delta,
3039 /*MOD*/DisResult* dres, /*OUT*/Bool* decode_OK )
3040 {
3041 Int len;
3042 UChar modrm;
3043 HChar dis_buf[50];
3044 IRTemp addr = IRTemp_INVALID;
3045 IRType ty = szToITy(sz);
3046 IRTemp t1 = newTemp(ty);
3047 IRTemp t2 = IRTemp_INVALID;
3048
3049 *decode_OK = True;
3050
3051 modrm = getIByte(delta);
3052
3053 if (locked && (gregOfRM(modrm) != 0 && gregOfRM(modrm) != 1)) {
3054 /* LOCK prefix only allowed with inc and dec subopcodes */
3055 *decode_OK = False;
3056 return delta;
3057 }
3058
3059 if (epartIsReg(modrm)) {
3060 assign(t1, getIReg(sz,eregOfRM(modrm)));
3061 switch (gregOfRM(modrm)) {
3062 case 0: /* INC */
3063 vassert(sz == 2 || sz == 4);
3064 t2 = newTemp(ty);
3065 assign(t2, binop(mkSizedOp(ty,Iop_Add8),
3066 mkexpr(t1), mkU(ty,1)));
3067 setFlags_INC_DEC( True, t2, ty );
3068 putIReg(sz,eregOfRM(modrm),mkexpr(t2));
3069 break;
3070 case 1: /* DEC */
3071 vassert(sz == 2 || sz == 4);
3072 t2 = newTemp(ty);
3073 assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
3074 mkexpr(t1), mkU(ty,1)));
3075 setFlags_INC_DEC( False, t2, ty );
3076 putIReg(sz,eregOfRM(modrm),mkexpr(t2));
3077 break;
3078 case 2: /* call Ev */
3079 vassert(sz == 4);
3080 t2 = newTemp(Ity_I32);
3081 assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4)));
3082 putIReg(4, R_ESP, mkexpr(t2));
3083 storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta+1));
3084 jmp_treg(dres, Ijk_Call, t1);
3085 vassert(dres->whatNext == Dis_StopHere);
3086 break;
3087 case 4: /* jmp Ev */
3088 vassert(sz == 4);
3089 jmp_treg(dres, Ijk_Boring, t1);
3090 vassert(dres->whatNext == Dis_StopHere);
3091 break;
3092 case 6: /* PUSH Ev */
3093 vassert(sz == 4 || sz == 2);
3094 t2 = newTemp(Ity_I32);
3095 assign( t2, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) );
3096 putIReg(4, R_ESP, mkexpr(t2) );
3097 storeLE( mkexpr(t2), mkexpr(t1) );
3098 break;
3099 default:
3100 *decode_OK = False;
3101 return delta;
3102 }
3103 delta++;
3104 DIP("%s%c %s\n", nameGrp5(gregOfRM(modrm)),
3105 nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
3106 } else {
3107 addr = disAMode ( &len, sorb, delta, dis_buf );
3108 assign(t1, loadLE(ty,mkexpr(addr)));
3109 switch (gregOfRM(modrm)) {
3110 case 0: /* INC */
3111 t2 = newTemp(ty);
3112 assign(t2, binop(mkSizedOp(ty,Iop_Add8),
3113 mkexpr(t1), mkU(ty,1)));
3114 if (locked) {
3115 casLE( mkexpr(addr),
3116 mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr );
3117 } else {
3118 storeLE(mkexpr(addr),mkexpr(t2));
3119 }
3120 setFlags_INC_DEC( True, t2, ty );
3121 break;
3122 case 1: /* DEC */
3123 t2 = newTemp(ty);
3124 assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
3125 mkexpr(t1), mkU(ty,1)));
3126 if (locked) {
3127 casLE( mkexpr(addr),
3128 mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr );
3129 } else {
3130 storeLE(mkexpr(addr),mkexpr(t2));
3131 }
3132 setFlags_INC_DEC( False, t2, ty );
3133 break;
3134 case 2: /* call Ev */
3135 vassert(sz == 4);
3136 t2 = newTemp(Ity_I32);
3137 assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4)));
3138 putIReg(4, R_ESP, mkexpr(t2));
3139 storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta+len));
3140 jmp_treg(dres, Ijk_Call, t1);
3141 vassert(dres->whatNext == Dis_StopHere);
3142 break;
3143 case 4: /* JMP Ev */
3144 vassert(sz == 4);
3145 jmp_treg(dres, Ijk_Boring, t1);
3146 vassert(dres->whatNext == Dis_StopHere);
3147 break;
3148 case 6: /* PUSH Ev */
3149 vassert(sz == 4 || sz == 2);
3150 t2 = newTemp(Ity_I32);
3151 assign( t2, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) );
3152 putIReg(4, R_ESP, mkexpr(t2) );
3153 storeLE( mkexpr(t2), mkexpr(t1) );
3154 break;
3155 default:
3156 *decode_OK = False;
3157 return delta;
3158 }
3159 delta += len;
3160 DIP("%s%c %s\n", nameGrp5(gregOfRM(modrm)),
3161 nameISize(sz), dis_buf);
3162 }
3163 return delta;
3164 }
3165
3166
3167 /*------------------------------------------------------------*/
3168 /*--- Disassembling string ops (including REP prefixes) ---*/
3169 /*------------------------------------------------------------*/
3170
3171 /* Code shared by all the string ops */
3172 static
dis_string_op_increment(Int sz,Int t_inc)3173 void dis_string_op_increment(Int sz, Int t_inc)
3174 {
3175 if (sz == 4 || sz == 2) {
3176 assign( t_inc,
3177 binop(Iop_Shl32, IRExpr_Get( OFFB_DFLAG, Ity_I32 ),
3178 mkU8(sz/2) ) );
3179 } else {
3180 assign( t_inc,
3181 IRExpr_Get( OFFB_DFLAG, Ity_I32 ) );
3182 }
3183 }
3184
3185 static
dis_string_op(void (* dis_OP)(Int,IRTemp),Int sz,const HChar * name,UChar sorb)3186 void dis_string_op( void (*dis_OP)( Int, IRTemp ),
3187 Int sz, const HChar* name, UChar sorb )
3188 {
3189 IRTemp t_inc = newTemp(Ity_I32);
3190 vassert(sorb == 0); /* hmm. so what was the point of passing it in? */
3191 dis_string_op_increment(sz, t_inc);
3192 dis_OP( sz, t_inc );
3193 DIP("%s%c\n", name, nameISize(sz));
3194 }
3195
3196 static
dis_MOVS(Int sz,IRTemp t_inc)3197 void dis_MOVS ( Int sz, IRTemp t_inc )
3198 {
3199 IRType ty = szToITy(sz);
3200 IRTemp td = newTemp(Ity_I32); /* EDI */
3201 IRTemp ts = newTemp(Ity_I32); /* ESI */
3202
3203 assign( td, getIReg(4, R_EDI) );
3204 assign( ts, getIReg(4, R_ESI) );
3205
3206 storeLE( mkexpr(td), loadLE(ty,mkexpr(ts)) );
3207
3208 putIReg( 4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) );
3209 putIReg( 4, R_ESI, binop(Iop_Add32, mkexpr(ts), mkexpr(t_inc)) );
3210 }
3211
3212 static
dis_LODS(Int sz,IRTemp t_inc)3213 void dis_LODS ( Int sz, IRTemp t_inc )
3214 {
3215 IRType ty = szToITy(sz);
3216 IRTemp ts = newTemp(Ity_I32); /* ESI */
3217
3218 assign( ts, getIReg(4, R_ESI) );
3219
3220 putIReg( sz, R_EAX, loadLE(ty, mkexpr(ts)) );
3221
3222 putIReg( 4, R_ESI, binop(Iop_Add32, mkexpr(ts), mkexpr(t_inc)) );
3223 }
3224
3225 static
dis_STOS(Int sz,IRTemp t_inc)3226 void dis_STOS ( Int sz, IRTemp t_inc )
3227 {
3228 IRType ty = szToITy(sz);
3229 IRTemp ta = newTemp(ty); /* EAX */
3230 IRTemp td = newTemp(Ity_I32); /* EDI */
3231
3232 assign( ta, getIReg(sz, R_EAX) );
3233 assign( td, getIReg(4, R_EDI) );
3234
3235 storeLE( mkexpr(td), mkexpr(ta) );
3236
3237 putIReg( 4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) );
3238 }
3239
3240 static
dis_CMPS(Int sz,IRTemp t_inc)3241 void dis_CMPS ( Int sz, IRTemp t_inc )
3242 {
3243 IRType ty = szToITy(sz);
3244 IRTemp tdv = newTemp(ty); /* (EDI) */
3245 IRTemp tsv = newTemp(ty); /* (ESI) */
3246 IRTemp td = newTemp(Ity_I32); /* EDI */
3247 IRTemp ts = newTemp(Ity_I32); /* ESI */
3248
3249 assign( td, getIReg(4, R_EDI) );
3250 assign( ts, getIReg(4, R_ESI) );
3251
3252 assign( tdv, loadLE(ty,mkexpr(td)) );
3253 assign( tsv, loadLE(ty,mkexpr(ts)) );
3254
3255 setFlags_DEP1_DEP2 ( Iop_Sub8, tsv, tdv, ty );
3256
3257 putIReg(4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) );
3258 putIReg(4, R_ESI, binop(Iop_Add32, mkexpr(ts), mkexpr(t_inc)) );
3259 }
3260
3261 static
dis_SCAS(Int sz,IRTemp t_inc)3262 void dis_SCAS ( Int sz, IRTemp t_inc )
3263 {
3264 IRType ty = szToITy(sz);
3265 IRTemp ta = newTemp(ty); /* EAX */
3266 IRTemp td = newTemp(Ity_I32); /* EDI */
3267 IRTemp tdv = newTemp(ty); /* (EDI) */
3268
3269 assign( ta, getIReg(sz, R_EAX) );
3270 assign( td, getIReg(4, R_EDI) );
3271
3272 assign( tdv, loadLE(ty,mkexpr(td)) );
3273 setFlags_DEP1_DEP2 ( Iop_Sub8, ta, tdv, ty );
3274
3275 putIReg(4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) );
3276 }
3277
3278
3279 /* Wrap the appropriate string op inside a REP/REPE/REPNE.
3280 We assume the insn is the last one in the basic block, and so emit a jump
3281 to the next insn, rather than just falling through. */
3282 static
dis_REP_op(DisResult * dres,X86Condcode cond,void (* dis_OP)(Int,IRTemp),Int sz,Addr32 eip,Addr32 eip_next,const HChar * name)3283 void dis_REP_op ( /*MOD*/DisResult* dres,
3284 X86Condcode cond,
3285 void (*dis_OP)(Int, IRTemp),
3286 Int sz, Addr32 eip, Addr32 eip_next, const HChar* name )
3287 {
3288 IRTemp t_inc = newTemp(Ity_I32);
3289 IRTemp tc = newTemp(Ity_I32); /* ECX */
3290
3291 assign( tc, getIReg(4,R_ECX) );
3292
3293 stmt( IRStmt_Exit( binop(Iop_CmpEQ32,mkexpr(tc),mkU32(0)),
3294 Ijk_Boring,
3295 IRConst_U32(eip_next), OFFB_EIP ) );
3296
3297 putIReg(4, R_ECX, binop(Iop_Sub32, mkexpr(tc), mkU32(1)) );
3298
3299 dis_string_op_increment(sz, t_inc);
3300 dis_OP (sz, t_inc);
3301
3302 if (cond == X86CondAlways) {
3303 jmp_lit(dres, Ijk_Boring, eip);
3304 vassert(dres->whatNext == Dis_StopHere);
3305 } else {
3306 stmt( IRStmt_Exit( mk_x86g_calculate_condition(cond),
3307 Ijk_Boring,
3308 IRConst_U32(eip), OFFB_EIP ) );
3309 jmp_lit(dres, Ijk_Boring, eip_next);
3310 vassert(dres->whatNext == Dis_StopHere);
3311 }
3312 DIP("%s%c\n", name, nameISize(sz));
3313 }
3314
3315
3316 /*------------------------------------------------------------*/
3317 /*--- Arithmetic, etc. ---*/
3318 /*------------------------------------------------------------*/
3319
3320 /* IMUL E, G. Supplied eip points to the modR/M byte. */
3321 static
dis_mul_E_G(UChar sorb,Int size,Int delta0)3322 UInt dis_mul_E_G ( UChar sorb,
3323 Int size,
3324 Int delta0 )
3325 {
3326 Int alen;
3327 HChar dis_buf[50];
3328 UChar rm = getIByte(delta0);
3329 IRType ty = szToITy(size);
3330 IRTemp te = newTemp(ty);
3331 IRTemp tg = newTemp(ty);
3332 IRTemp resLo = newTemp(ty);
3333
3334 assign( tg, getIReg(size, gregOfRM(rm)) );
3335 if (epartIsReg(rm)) {
3336 assign( te, getIReg(size, eregOfRM(rm)) );
3337 } else {
3338 IRTemp addr = disAMode( &alen, sorb, delta0, dis_buf );
3339 assign( te, loadLE(ty,mkexpr(addr)) );
3340 }
3341
3342 setFlags_MUL ( ty, te, tg, X86G_CC_OP_SMULB );
3343
3344 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tg) ) );
3345
3346 putIReg(size, gregOfRM(rm), mkexpr(resLo) );
3347
3348 if (epartIsReg(rm)) {
3349 DIP("imul%c %s, %s\n", nameISize(size),
3350 nameIReg(size,eregOfRM(rm)),
3351 nameIReg(size,gregOfRM(rm)));
3352 return 1+delta0;
3353 } else {
3354 DIP("imul%c %s, %s\n", nameISize(size),
3355 dis_buf, nameIReg(size,gregOfRM(rm)));
3356 return alen+delta0;
3357 }
3358 }
3359
3360
3361 /* IMUL I * E -> G. Supplied eip points to the modR/M byte. */
3362 static
dis_imul_I_E_G(UChar sorb,Int size,Int delta,Int litsize)3363 UInt dis_imul_I_E_G ( UChar sorb,
3364 Int size,
3365 Int delta,
3366 Int litsize )
3367 {
3368 Int d32, alen;
3369 HChar dis_buf[50];
3370 UChar rm = getIByte(delta);
3371 IRType ty = szToITy(size);
3372 IRTemp te = newTemp(ty);
3373 IRTemp tl = newTemp(ty);
3374 IRTemp resLo = newTemp(ty);
3375
3376 vassert(size == 1 || size == 2 || size == 4);
3377
3378 if (epartIsReg(rm)) {
3379 assign(te, getIReg(size, eregOfRM(rm)));
3380 delta++;
3381 } else {
3382 IRTemp addr = disAMode( &alen, sorb, delta, dis_buf );
3383 assign(te, loadLE(ty, mkexpr(addr)));
3384 delta += alen;
3385 }
3386 d32 = getSDisp(litsize,delta);
3387 delta += litsize;
3388
3389 if (size == 1) d32 &= 0xFF;
3390 if (size == 2) d32 &= 0xFFFF;
3391
3392 assign(tl, mkU(ty,d32));
3393
3394 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tl) ));
3395
3396 setFlags_MUL ( ty, te, tl, X86G_CC_OP_SMULB );
3397
3398 putIReg(size, gregOfRM(rm), mkexpr(resLo));
3399
3400 DIP("imul %d, %s, %s\n", d32,
3401 ( epartIsReg(rm) ? nameIReg(size,eregOfRM(rm)) : dis_buf ),
3402 nameIReg(size,gregOfRM(rm)) );
3403 return delta;
3404 }
3405
3406
3407 /* Generate an IR sequence to do a count-leading-zeroes operation on
3408 the supplied IRTemp, and return a new IRTemp holding the result.
3409 'ty' may be Ity_I16 or Ity_I32 only. In the case where the
3410 argument is zero, return the number of bits in the word (the
3411 natural semantics). */
gen_LZCNT(IRType ty,IRTemp src)3412 static IRTemp gen_LZCNT ( IRType ty, IRTemp src )
3413 {
3414 vassert(ty == Ity_I32 || ty == Ity_I16);
3415
3416 IRTemp src32 = newTemp(Ity_I32);
3417 assign(src32, widenUto32( mkexpr(src) ));
3418
3419 IRTemp src32x = newTemp(Ity_I32);
3420 assign(src32x,
3421 binop(Iop_Shl32, mkexpr(src32),
3422 mkU8(32 - 8 * sizeofIRType(ty))));
3423
3424 // Clz32 has undefined semantics when its input is zero, so
3425 // special-case around that.
3426 IRTemp res32 = newTemp(Ity_I32);
3427 assign(res32,
3428 IRExpr_ITE(
3429 binop(Iop_CmpEQ32, mkexpr(src32x), mkU32(0)),
3430 mkU32(8 * sizeofIRType(ty)),
3431 unop(Iop_Clz32, mkexpr(src32x))
3432 ));
3433
3434 IRTemp res = newTemp(ty);
3435 assign(res, narrowTo(ty, mkexpr(res32)));
3436 return res;
3437 }
3438
3439
3440 /*------------------------------------------------------------*/
3441 /*--- ---*/
3442 /*--- x87 FLOATING POINT INSTRUCTIONS ---*/
3443 /*--- ---*/
3444 /*------------------------------------------------------------*/
3445
3446 /* --- Helper functions for dealing with the register stack. --- */
3447
3448 /* --- Set the emulation-warning pseudo-register. --- */
3449
put_emwarn(IRExpr * e)3450 static void put_emwarn ( IRExpr* e /* :: Ity_I32 */ )
3451 {
3452 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
3453 stmt( IRStmt_Put( OFFB_EMNOTE, e ) );
3454 }
3455
3456 /* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */
3457
mkQNaN64(void)3458 static IRExpr* mkQNaN64 ( void )
3459 {
3460 /* QNaN is 0 2047 1 0(51times)
3461 == 0b 11111111111b 1 0(51times)
3462 == 0x7FF8 0000 0000 0000
3463 */
3464 return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL));
3465 }
3466
3467 /* --------- Get/put the top-of-stack pointer. --------- */
3468
get_ftop(void)3469 static IRExpr* get_ftop ( void )
3470 {
3471 return IRExpr_Get( OFFB_FTOP, Ity_I32 );
3472 }
3473
put_ftop(IRExpr * e)3474 static void put_ftop ( IRExpr* e )
3475 {
3476 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
3477 stmt( IRStmt_Put( OFFB_FTOP, e ) );
3478 }
3479
3480 /* --------- Get/put the C3210 bits. --------- */
3481
get_C3210(void)3482 static IRExpr* get_C3210 ( void )
3483 {
3484 return IRExpr_Get( OFFB_FC3210, Ity_I32 );
3485 }
3486
put_C3210(IRExpr * e)3487 static void put_C3210 ( IRExpr* e )
3488 {
3489 stmt( IRStmt_Put( OFFB_FC3210, e ) );
3490 }
3491
3492 /* --------- Get/put the FPU rounding mode. --------- */
get_fpround(void)3493 static IRExpr* /* :: Ity_I32 */ get_fpround ( void )
3494 {
3495 return IRExpr_Get( OFFB_FPROUND, Ity_I32 );
3496 }
3497
put_fpround(IRExpr * e)3498 static void put_fpround ( IRExpr* /* :: Ity_I32 */ e )
3499 {
3500 stmt( IRStmt_Put( OFFB_FPROUND, e ) );
3501 }
3502
3503
3504 /* --------- Synthesise a 2-bit FPU rounding mode. --------- */
3505 /* Produces a value in 0 .. 3, which is encoded as per the type
3506 IRRoundingMode. Since the guest_FPROUND value is also encoded as
3507 per IRRoundingMode, we merely need to get it and mask it for
3508 safety.
3509 */
get_roundingmode(void)3510 static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void )
3511 {
3512 return binop( Iop_And32, get_fpround(), mkU32(3) );
3513 }
3514
get_FAKE_roundingmode(void)3515 static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
3516 {
3517 return mkU32(Irrm_NEAREST);
3518 }
3519
3520
3521 /* --------- Get/set FP register tag bytes. --------- */
3522
3523 /* Given i, and some expression e, generate 'ST_TAG(i) = e'. */
3524
put_ST_TAG(Int i,IRExpr * value)3525 static void put_ST_TAG ( Int i, IRExpr* value )
3526 {
3527 IRRegArray* descr;
3528 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_I8);
3529 descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
3530 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) );
3531 }
3532
3533 /* Given i, generate an expression yielding 'ST_TAG(i)'. This will be
3534 zero to indicate "Empty" and nonzero to indicate "NonEmpty". */
3535
get_ST_TAG(Int i)3536 static IRExpr* get_ST_TAG ( Int i )
3537 {
3538 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
3539 return IRExpr_GetI( descr, get_ftop(), i );
3540 }
3541
3542
3543 /* --------- Get/set FP registers. --------- */
3544
3545 /* Given i, and some expression e, emit 'ST(i) = e' and set the
3546 register's tag to indicate the register is full. The previous
3547 state of the register is not checked. */
3548
put_ST_UNCHECKED(Int i,IRExpr * value)3549 static void put_ST_UNCHECKED ( Int i, IRExpr* value )
3550 {
3551 IRRegArray* descr;
3552 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_F64);
3553 descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
3554 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) );
3555 /* Mark the register as in-use. */
3556 put_ST_TAG(i, mkU8(1));
3557 }
3558
3559 /* Given i, and some expression e, emit
3560 ST(i) = is_full(i) ? NaN : e
3561 and set the tag accordingly.
3562 */
3563
put_ST(Int i,IRExpr * value)3564 static void put_ST ( Int i, IRExpr* value )
3565 {
3566 put_ST_UNCHECKED(
3567 i,
3568 IRExpr_ITE( binop(Iop_CmpNE8, get_ST_TAG(i), mkU8(0)),
3569 /* non-0 means full */
3570 mkQNaN64(),
3571 /* 0 means empty */
3572 value
3573 )
3574 );
3575 }
3576
3577
3578 /* Given i, generate an expression yielding 'ST(i)'. */
3579
get_ST_UNCHECKED(Int i)3580 static IRExpr* get_ST_UNCHECKED ( Int i )
3581 {
3582 IRRegArray* descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
3583 return IRExpr_GetI( descr, get_ftop(), i );
3584 }
3585
3586
3587 /* Given i, generate an expression yielding
3588 is_full(i) ? ST(i) : NaN
3589 */
3590
get_ST(Int i)3591 static IRExpr* get_ST ( Int i )
3592 {
3593 return
3594 IRExpr_ITE( binop(Iop_CmpNE8, get_ST_TAG(i), mkU8(0)),
3595 /* non-0 means full */
3596 get_ST_UNCHECKED(i),
3597 /* 0 means empty */
3598 mkQNaN64());
3599 }
3600
3601
3602 /* Given i, and some expression e, and a condition cond, generate IR
3603 which has the same effect as put_ST(i,e) when cond is true and has
3604 no effect when cond is false. Given the lack of proper
3605 if-then-else in the IR, this is pretty tricky.
3606 */
3607
maybe_put_ST(IRTemp cond,Int i,IRExpr * value)3608 static void maybe_put_ST ( IRTemp cond, Int i, IRExpr* value )
3609 {
3610 // new_tag = if cond then FULL else old_tag
3611 // new_val = if cond then (if old_tag==FULL then NaN else val)
3612 // else old_val
3613
3614 IRTemp old_tag = newTemp(Ity_I8);
3615 assign(old_tag, get_ST_TAG(i));
3616 IRTemp new_tag = newTemp(Ity_I8);
3617 assign(new_tag,
3618 IRExpr_ITE(mkexpr(cond), mkU8(1)/*FULL*/, mkexpr(old_tag)));
3619
3620 IRTemp old_val = newTemp(Ity_F64);
3621 assign(old_val, get_ST_UNCHECKED(i));
3622 IRTemp new_val = newTemp(Ity_F64);
3623 assign(new_val,
3624 IRExpr_ITE(mkexpr(cond),
3625 IRExpr_ITE(binop(Iop_CmpNE8, mkexpr(old_tag), mkU8(0)),
3626 /* non-0 means full */
3627 mkQNaN64(),
3628 /* 0 means empty */
3629 value),
3630 mkexpr(old_val)));
3631
3632 put_ST_UNCHECKED(i, mkexpr(new_val));
3633 // put_ST_UNCHECKED incorrectly sets tag(i) to always be FULL. So
3634 // now set it to new_tag instead.
3635 put_ST_TAG(i, mkexpr(new_tag));
3636 }
3637
3638 /* Adjust FTOP downwards by one register. */
3639
fp_push(void)3640 static void fp_push ( void )
3641 {
3642 put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) );
3643 }
3644
3645 /* Adjust FTOP downwards by one register when COND is 1:I1. Else
3646 don't change it. */
3647
maybe_fp_push(IRTemp cond)3648 static void maybe_fp_push ( IRTemp cond )
3649 {
3650 put_ftop( binop(Iop_Sub32, get_ftop(), unop(Iop_1Uto32,mkexpr(cond))) );
3651 }
3652
3653 /* Adjust FTOP upwards by one register, and mark the vacated register
3654 as empty. */
3655
fp_pop(void)3656 static void fp_pop ( void )
3657 {
3658 put_ST_TAG(0, mkU8(0));
3659 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
3660 }
3661
3662 /* Set the C2 bit of the FPU status register to e[0]. Assumes that
3663 e[31:1] == 0.
3664 */
set_C2(IRExpr * e)3665 static void set_C2 ( IRExpr* e )
3666 {
3667 IRExpr* cleared = binop(Iop_And32, get_C3210(), mkU32(~X86G_FC_MASK_C2));
3668 put_C3210( binop(Iop_Or32,
3669 cleared,
3670 binop(Iop_Shl32, e, mkU8(X86G_FC_SHIFT_C2))) );
3671 }
3672
3673 /* Generate code to check that abs(d64) < 2^63 and is finite. This is
3674 used to do the range checks for FSIN, FCOS, FSINCOS and FPTAN. The
3675 test is simple, but the derivation of it is not so simple.
3676
3677 The exponent field for an IEEE754 double is 11 bits. That means it
3678 can take values 0 through 0x7FF. If the exponent has value 0x7FF,
3679 the number is either a NaN or an Infinity and so is not finite.
3680 Furthermore, a finite value of exactly 2^63 is the smallest value
3681 that has exponent value 0x43E. Hence, what we need to do is
3682 extract the exponent, ignoring the sign bit and mantissa, and check
3683 it is < 0x43E, or <= 0x43D.
3684
3685 To make this easily applicable to 32- and 64-bit targets, a
3686 roundabout approach is used. First the number is converted to I64,
3687 then the top 32 bits are taken. Shifting them right by 20 bits
3688 places the sign bit and exponent in the bottom 12 bits. Anding
3689 with 0x7FF gets rid of the sign bit, leaving just the exponent
3690 available for comparison.
3691 */
math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(IRTemp d64)3692 static IRTemp math_IS_TRIG_ARG_FINITE_AND_IN_RANGE ( IRTemp d64 )
3693 {
3694 IRTemp i64 = newTemp(Ity_I64);
3695 assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(d64)) );
3696 IRTemp exponent = newTemp(Ity_I32);
3697 assign(exponent,
3698 binop(Iop_And32,
3699 binop(Iop_Shr32, unop(Iop_64HIto32, mkexpr(i64)), mkU8(20)),
3700 mkU32(0x7FF)));
3701 IRTemp in_range_and_finite = newTemp(Ity_I1);
3702 assign(in_range_and_finite,
3703 binop(Iop_CmpLE32U, mkexpr(exponent), mkU32(0x43D)));
3704 return in_range_and_finite;
3705 }
3706
3707 /* Invent a plausible-looking FPU status word value:
3708 ((ftop & 7) << 11) | (c3210 & 0x4700)
3709 */
get_FPU_sw(void)3710 static IRExpr* get_FPU_sw ( void )
3711 {
3712 return
3713 unop(Iop_32to16,
3714 binop(Iop_Or32,
3715 binop(Iop_Shl32,
3716 binop(Iop_And32, get_ftop(), mkU32(7)),
3717 mkU8(11)),
3718 binop(Iop_And32, get_C3210(), mkU32(0x4700))
3719 ));
3720 }
3721
3722
3723 /* ------------------------------------------------------- */
3724 /* Given all that stack-mangling junk, we can now go ahead
3725 and describe FP instructions.
3726 */
3727
3728 /* ST(0) = ST(0) `op` mem64/32(addr)
3729 Need to check ST(0)'s tag on read, but not on write.
3730 */
3731 static
fp_do_op_mem_ST_0(IRTemp addr,const HChar * op_txt,HChar * dis_buf,IROp op,Bool dbl)3732 void fp_do_op_mem_ST_0 ( IRTemp addr, const HChar* op_txt, HChar* dis_buf,
3733 IROp op, Bool dbl )
3734 {
3735 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
3736 if (dbl) {
3737 put_ST_UNCHECKED(0,
3738 triop( op,
3739 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3740 get_ST(0),
3741 loadLE(Ity_F64,mkexpr(addr))
3742 ));
3743 } else {
3744 put_ST_UNCHECKED(0,
3745 triop( op,
3746 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3747 get_ST(0),
3748 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr)))
3749 ));
3750 }
3751 }
3752
3753
3754 /* ST(0) = mem64/32(addr) `op` ST(0)
3755 Need to check ST(0)'s tag on read, but not on write.
3756 */
3757 static
fp_do_oprev_mem_ST_0(IRTemp addr,const HChar * op_txt,HChar * dis_buf,IROp op,Bool dbl)3758 void fp_do_oprev_mem_ST_0 ( IRTemp addr, const HChar* op_txt, HChar* dis_buf,
3759 IROp op, Bool dbl )
3760 {
3761 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
3762 if (dbl) {
3763 put_ST_UNCHECKED(0,
3764 triop( op,
3765 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3766 loadLE(Ity_F64,mkexpr(addr)),
3767 get_ST(0)
3768 ));
3769 } else {
3770 put_ST_UNCHECKED(0,
3771 triop( op,
3772 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3773 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))),
3774 get_ST(0)
3775 ));
3776 }
3777 }
3778
3779
3780 /* ST(dst) = ST(dst) `op` ST(src).
3781 Check dst and src tags when reading but not on write.
3782 */
3783 static
fp_do_op_ST_ST(const HChar * op_txt,IROp op,UInt st_src,UInt st_dst,Bool pop_after)3784 void fp_do_op_ST_ST ( const HChar* op_txt, IROp op, UInt st_src, UInt st_dst,
3785 Bool pop_after )
3786 {
3787 DIP("f%s%s st(%d), st(%d)\n", op_txt, pop_after?"p":"",
3788 (Int)st_src, (Int)st_dst );
3789 put_ST_UNCHECKED(
3790 st_dst,
3791 triop( op,
3792 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3793 get_ST(st_dst),
3794 get_ST(st_src) )
3795 );
3796 if (pop_after)
3797 fp_pop();
3798 }
3799
3800 /* ST(dst) = ST(src) `op` ST(dst).
3801 Check dst and src tags when reading but not on write.
3802 */
3803 static
fp_do_oprev_ST_ST(const HChar * op_txt,IROp op,UInt st_src,UInt st_dst,Bool pop_after)3804 void fp_do_oprev_ST_ST ( const HChar* op_txt, IROp op, UInt st_src,
3805 UInt st_dst, Bool pop_after )
3806 {
3807 DIP("f%s%s st(%d), st(%d)\n", op_txt, pop_after?"p":"",
3808 (Int)st_src, (Int)st_dst );
3809 put_ST_UNCHECKED(
3810 st_dst,
3811 triop( op,
3812 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3813 get_ST(st_src),
3814 get_ST(st_dst) )
3815 );
3816 if (pop_after)
3817 fp_pop();
3818 }
3819
3820 /* %eflags(Z,P,C) = UCOMI( st(0), st(i) ) */
fp_do_ucomi_ST0_STi(UInt i,Bool pop_after)3821 static void fp_do_ucomi_ST0_STi ( UInt i, Bool pop_after )
3822 {
3823 DIP("fucomi%s %%st(0),%%st(%d)\n", pop_after ? "p" : "", (Int)i );
3824 /* This is a bit of a hack (and isn't really right). It sets
3825 Z,P,C,O correctly, but forces A and S to zero, whereas the Intel
3826 documentation implies A and S are unchanged.
3827 */
3828 /* It's also fishy in that it is used both for COMIP and
3829 UCOMIP, and they aren't the same (although similar). */
3830 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
3831 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
3832 stmt( IRStmt_Put( OFFB_CC_DEP1,
3833 binop( Iop_And32,
3834 binop(Iop_CmpF64, get_ST(0), get_ST(i)),
3835 mkU32(0x45)
3836 )));
3837 /* Set NDEP even though it isn't used. This makes redundant-PUT
3838 elimination of previous stores to this field work better. */
3839 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
3840 if (pop_after)
3841 fp_pop();
3842 }
3843
3844
3845 static
dis_FPU(Bool * decode_ok,UChar sorb,Int delta)3846 UInt dis_FPU ( Bool* decode_ok, UChar sorb, Int delta )
3847 {
3848 Int len;
3849 UInt r_src, r_dst;
3850 HChar dis_buf[50];
3851 IRTemp t1, t2;
3852
3853 /* On entry, delta points at the second byte of the insn (the modrm
3854 byte).*/
3855 UChar first_opcode = getIByte(delta-1);
3856 UChar modrm = getIByte(delta+0);
3857
3858 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */
3859
3860 if (first_opcode == 0xD8) {
3861 if (modrm < 0xC0) {
3862
3863 /* bits 5,4,3 are an opcode extension, and the modRM also
3864 specifies an address. */
3865 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
3866 delta += len;
3867
3868 switch (gregOfRM(modrm)) {
3869
3870 case 0: /* FADD single-real */
3871 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, False );
3872 break;
3873
3874 case 1: /* FMUL single-real */
3875 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, False );
3876 break;
3877
3878 case 2: /* FCOM single-real */
3879 DIP("fcoms %s\n", dis_buf);
3880 /* This forces C1 to zero, which isn't right. */
3881 put_C3210(
3882 binop( Iop_And32,
3883 binop(Iop_Shl32,
3884 binop(Iop_CmpF64,
3885 get_ST(0),
3886 unop(Iop_F32toF64,
3887 loadLE(Ity_F32,mkexpr(addr)))),
3888 mkU8(8)),
3889 mkU32(0x4500)
3890 ));
3891 break;
3892
3893 case 3: /* FCOMP single-real */
3894 DIP("fcomps %s\n", dis_buf);
3895 /* This forces C1 to zero, which isn't right. */
3896 put_C3210(
3897 binop( Iop_And32,
3898 binop(Iop_Shl32,
3899 binop(Iop_CmpF64,
3900 get_ST(0),
3901 unop(Iop_F32toF64,
3902 loadLE(Ity_F32,mkexpr(addr)))),
3903 mkU8(8)),
3904 mkU32(0x4500)
3905 ));
3906 fp_pop();
3907 break;
3908
3909 case 4: /* FSUB single-real */
3910 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, False );
3911 break;
3912
3913 case 5: /* FSUBR single-real */
3914 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, False );
3915 break;
3916
3917 case 6: /* FDIV single-real */
3918 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, False );
3919 break;
3920
3921 case 7: /* FDIVR single-real */
3922 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, False );
3923 break;
3924
3925 default:
3926 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm));
3927 vex_printf("first_opcode == 0xD8\n");
3928 goto decode_fail;
3929 }
3930 } else {
3931 delta++;
3932 switch (modrm) {
3933
3934 case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */
3935 fp_do_op_ST_ST ( "add", Iop_AddF64, modrm - 0xC0, 0, False );
3936 break;
3937
3938 case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */
3939 fp_do_op_ST_ST ( "mul", Iop_MulF64, modrm - 0xC8, 0, False );
3940 break;
3941
3942 /* Dunno if this is right */
3943 case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */
3944 r_dst = (UInt)modrm - 0xD0;
3945 DIP("fcom %%st(0),%%st(%d)\n", (Int)r_dst);
3946 /* This forces C1 to zero, which isn't right. */
3947 put_C3210(
3948 binop( Iop_And32,
3949 binop(Iop_Shl32,
3950 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
3951 mkU8(8)),
3952 mkU32(0x4500)
3953 ));
3954 break;
3955
3956 /* Dunno if this is right */
3957 case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */
3958 r_dst = (UInt)modrm - 0xD8;
3959 DIP("fcomp %%st(0),%%st(%d)\n", (Int)r_dst);
3960 /* This forces C1 to zero, which isn't right. */
3961 put_C3210(
3962 binop( Iop_And32,
3963 binop(Iop_Shl32,
3964 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
3965 mkU8(8)),
3966 mkU32(0x4500)
3967 ));
3968 fp_pop();
3969 break;
3970
3971 case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */
3972 fp_do_op_ST_ST ( "sub", Iop_SubF64, modrm - 0xE0, 0, False );
3973 break;
3974
3975 case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */
3976 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, modrm - 0xE8, 0, False );
3977 break;
3978
3979 case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */
3980 fp_do_op_ST_ST ( "div", Iop_DivF64, modrm - 0xF0, 0, False );
3981 break;
3982
3983 case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */
3984 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, modrm - 0xF8, 0, False );
3985 break;
3986
3987 default:
3988 goto decode_fail;
3989 }
3990 }
3991 }
3992
3993 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */
3994 else
3995 if (first_opcode == 0xD9) {
3996 if (modrm < 0xC0) {
3997
3998 /* bits 5,4,3 are an opcode extension, and the modRM also
3999 specifies an address. */
4000 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
4001 delta += len;
4002
4003 switch (gregOfRM(modrm)) {
4004
4005 case 0: /* FLD single-real */
4006 DIP("flds %s\n", dis_buf);
4007 fp_push();
4008 put_ST(0, unop(Iop_F32toF64,
4009 loadLE(Ity_F32, mkexpr(addr))));
4010 break;
4011
4012 case 2: /* FST single-real */
4013 DIP("fsts %s\n", dis_buf);
4014 storeLE(mkexpr(addr),
4015 binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
4016 break;
4017
4018 case 3: /* FSTP single-real */
4019 DIP("fstps %s\n", dis_buf);
4020 storeLE(mkexpr(addr),
4021 binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
4022 fp_pop();
4023 break;
4024
4025 case 4: { /* FLDENV m28 */
4026 /* Uses dirty helper:
4027 VexEmNote x86g_do_FLDENV ( VexGuestX86State*, HWord ) */
4028 IRTemp ew = newTemp(Ity_I32);
4029 IRDirty* d = unsafeIRDirty_0_N (
4030 0/*regparms*/,
4031 "x86g_dirtyhelper_FLDENV",
4032 &x86g_dirtyhelper_FLDENV,
4033 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) )
4034 );
4035 d->tmp = ew;
4036 /* declare we're reading memory */
4037 d->mFx = Ifx_Read;
4038 d->mAddr = mkexpr(addr);
4039 d->mSize = 28;
4040
4041 /* declare we're writing guest state */
4042 d->nFxState = 4;
4043 vex_bzero(&d->fxState, sizeof(d->fxState));
4044
4045 d->fxState[0].fx = Ifx_Write;
4046 d->fxState[0].offset = OFFB_FTOP;
4047 d->fxState[0].size = sizeof(UInt);
4048
4049 d->fxState[1].fx = Ifx_Write;
4050 d->fxState[1].offset = OFFB_FPTAGS;
4051 d->fxState[1].size = 8 * sizeof(UChar);
4052
4053 d->fxState[2].fx = Ifx_Write;
4054 d->fxState[2].offset = OFFB_FPROUND;
4055 d->fxState[2].size = sizeof(UInt);
4056
4057 d->fxState[3].fx = Ifx_Write;
4058 d->fxState[3].offset = OFFB_FC3210;
4059 d->fxState[3].size = sizeof(UInt);
4060
4061 stmt( IRStmt_Dirty(d) );
4062
4063 /* ew contains any emulation warning we may need to
4064 issue. If needed, side-exit to the next insn,
4065 reporting the warning, so that Valgrind's dispatcher
4066 sees the warning. */
4067 put_emwarn( mkexpr(ew) );
4068 stmt(
4069 IRStmt_Exit(
4070 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
4071 Ijk_EmWarn,
4072 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta),
4073 OFFB_EIP
4074 )
4075 );
4076
4077 DIP("fldenv %s\n", dis_buf);
4078 break;
4079 }
4080
4081 case 5: {/* FLDCW */
4082 /* The only thing we observe in the control word is the
4083 rounding mode. Therefore, pass the 16-bit value
4084 (x87 native-format control word) to a clean helper,
4085 getting back a 64-bit value, the lower half of which
4086 is the FPROUND value to store, and the upper half of
4087 which is the emulation-warning token which may be
4088 generated.
4089 */
4090 /* ULong x86h_check_fldcw ( UInt ); */
4091 IRTemp t64 = newTemp(Ity_I64);
4092 IRTemp ew = newTemp(Ity_I32);
4093 DIP("fldcw %s\n", dis_buf);
4094 assign( t64, mkIRExprCCall(
4095 Ity_I64, 0/*regparms*/,
4096 "x86g_check_fldcw",
4097 &x86g_check_fldcw,
4098 mkIRExprVec_1(
4099 unop( Iop_16Uto32,
4100 loadLE(Ity_I16, mkexpr(addr)))
4101 )
4102 )
4103 );
4104
4105 put_fpround( unop(Iop_64to32, mkexpr(t64)) );
4106 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
4107 put_emwarn( mkexpr(ew) );
4108 /* Finally, if an emulation warning was reported,
4109 side-exit to the next insn, reporting the warning,
4110 so that Valgrind's dispatcher sees the warning. */
4111 stmt(
4112 IRStmt_Exit(
4113 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
4114 Ijk_EmWarn,
4115 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta),
4116 OFFB_EIP
4117 )
4118 );
4119 break;
4120 }
4121
4122 case 6: { /* FNSTENV m28 */
4123 /* Uses dirty helper:
4124 void x86g_do_FSTENV ( VexGuestX86State*, HWord ) */
4125 IRDirty* d = unsafeIRDirty_0_N (
4126 0/*regparms*/,
4127 "x86g_dirtyhelper_FSTENV",
4128 &x86g_dirtyhelper_FSTENV,
4129 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) )
4130 );
4131 /* declare we're writing memory */
4132 d->mFx = Ifx_Write;
4133 d->mAddr = mkexpr(addr);
4134 d->mSize = 28;
4135
4136 /* declare we're reading guest state */
4137 d->nFxState = 4;
4138 vex_bzero(&d->fxState, sizeof(d->fxState));
4139
4140 d->fxState[0].fx = Ifx_Read;
4141 d->fxState[0].offset = OFFB_FTOP;
4142 d->fxState[0].size = sizeof(UInt);
4143
4144 d->fxState[1].fx = Ifx_Read;
4145 d->fxState[1].offset = OFFB_FPTAGS;
4146 d->fxState[1].size = 8 * sizeof(UChar);
4147
4148 d->fxState[2].fx = Ifx_Read;
4149 d->fxState[2].offset = OFFB_FPROUND;
4150 d->fxState[2].size = sizeof(UInt);
4151
4152 d->fxState[3].fx = Ifx_Read;
4153 d->fxState[3].offset = OFFB_FC3210;
4154 d->fxState[3].size = sizeof(UInt);
4155
4156 stmt( IRStmt_Dirty(d) );
4157
4158 DIP("fnstenv %s\n", dis_buf);
4159 break;
4160 }
4161
4162 case 7: /* FNSTCW */
4163 /* Fake up a native x87 FPU control word. The only
4164 thing it depends on is FPROUND[1:0], so call a clean
4165 helper to cook it up. */
4166 /* UInt x86h_create_fpucw ( UInt fpround ) */
4167 DIP("fnstcw %s\n", dis_buf);
4168 storeLE(
4169 mkexpr(addr),
4170 unop( Iop_32to16,
4171 mkIRExprCCall(
4172 Ity_I32, 0/*regp*/,
4173 "x86g_create_fpucw", &x86g_create_fpucw,
4174 mkIRExprVec_1( get_fpround() )
4175 )
4176 )
4177 );
4178 break;
4179
4180 default:
4181 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm));
4182 vex_printf("first_opcode == 0xD9\n");
4183 goto decode_fail;
4184 }
4185
4186 } else {
4187 delta++;
4188 switch (modrm) {
4189
4190 case 0xC0 ... 0xC7: /* FLD %st(?) */
4191 r_src = (UInt)modrm - 0xC0;
4192 DIP("fld %%st(%d)\n", (Int)r_src);
4193 t1 = newTemp(Ity_F64);
4194 assign(t1, get_ST(r_src));
4195 fp_push();
4196 put_ST(0, mkexpr(t1));
4197 break;
4198
4199 case 0xC8 ... 0xCF: /* FXCH %st(?) */
4200 r_src = (UInt)modrm - 0xC8;
4201 DIP("fxch %%st(%d)\n", (Int)r_src);
4202 t1 = newTemp(Ity_F64);
4203 t2 = newTemp(Ity_F64);
4204 assign(t1, get_ST(0));
4205 assign(t2, get_ST(r_src));
4206 put_ST_UNCHECKED(0, mkexpr(t2));
4207 put_ST_UNCHECKED(r_src, mkexpr(t1));
4208 break;
4209
4210 case 0xE0: /* FCHS */
4211 DIP("fchs\n");
4212 put_ST_UNCHECKED(0, unop(Iop_NegF64, get_ST(0)));
4213 break;
4214
4215 case 0xE1: /* FABS */
4216 DIP("fabs\n");
4217 put_ST_UNCHECKED(0, unop(Iop_AbsF64, get_ST(0)));
4218 break;
4219
4220 case 0xE4: /* FTST */
4221 DIP("ftst\n");
4222 /* This forces C1 to zero, which isn't right. */
4223 /* Well, in fact the Intel docs say (bizarrely): "C1 is
4224 set to 0 if stack underflow occurred; otherwise, set
4225 to 0" which is pretty nonsensical. I guess it's a
4226 typo. */
4227 put_C3210(
4228 binop( Iop_And32,
4229 binop(Iop_Shl32,
4230 binop(Iop_CmpF64,
4231 get_ST(0),
4232 IRExpr_Const(IRConst_F64i(0x0ULL))),
4233 mkU8(8)),
4234 mkU32(0x4500)
4235 ));
4236 break;
4237
4238 case 0xE5: { /* FXAM */
4239 /* This is an interesting one. It examines %st(0),
4240 regardless of whether the tag says it's empty or not.
4241 Here, just pass both the tag (in our format) and the
4242 value (as a double, actually a ULong) to a helper
4243 function. */
4244 IRExpr** args
4245 = mkIRExprVec_2( unop(Iop_8Uto32, get_ST_TAG(0)),
4246 unop(Iop_ReinterpF64asI64,
4247 get_ST_UNCHECKED(0)) );
4248 put_C3210(mkIRExprCCall(
4249 Ity_I32,
4250 0/*regparm*/,
4251 "x86g_calculate_FXAM", &x86g_calculate_FXAM,
4252 args
4253 ));
4254 DIP("fxam\n");
4255 break;
4256 }
4257
4258 case 0xE8: /* FLD1 */
4259 DIP("fld1\n");
4260 fp_push();
4261 /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */
4262 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL)));
4263 break;
4264
4265 case 0xE9: /* FLDL2T */
4266 DIP("fldl2t\n");
4267 fp_push();
4268 /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */
4269 put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL)));
4270 break;
4271
4272 case 0xEA: /* FLDL2E */
4273 DIP("fldl2e\n");
4274 fp_push();
4275 /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */
4276 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL)));
4277 break;
4278
4279 case 0xEB: /* FLDPI */
4280 DIP("fldpi\n");
4281 fp_push();
4282 /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */
4283 put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL)));
4284 break;
4285
4286 case 0xEC: /* FLDLG2 */
4287 DIP("fldlg2\n");
4288 fp_push();
4289 /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */
4290 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL)));
4291 break;
4292
4293 case 0xED: /* FLDLN2 */
4294 DIP("fldln2\n");
4295 fp_push();
4296 /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */
4297 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL)));
4298 break;
4299
4300 case 0xEE: /* FLDZ */
4301 DIP("fldz\n");
4302 fp_push();
4303 /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */
4304 put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL)));
4305 break;
4306
4307 case 0xF0: /* F2XM1 */
4308 DIP("f2xm1\n");
4309 put_ST_UNCHECKED(0,
4310 binop(Iop_2xm1F64,
4311 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4312 get_ST(0)));
4313 break;
4314
4315 case 0xF1: /* FYL2X */
4316 DIP("fyl2x\n");
4317 put_ST_UNCHECKED(1,
4318 triop(Iop_Yl2xF64,
4319 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4320 get_ST(1),
4321 get_ST(0)));
4322 fp_pop();
4323 break;
4324
4325 case 0xF2: { /* FPTAN */
4326 DIP("fptan\n");
4327 IRTemp argD = newTemp(Ity_F64);
4328 assign(argD, get_ST(0));
4329 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD);
4330 IRTemp resD = newTemp(Ity_F64);
4331 assign(resD,
4332 IRExpr_ITE(
4333 mkexpr(argOK),
4334 binop(Iop_TanF64,
4335 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4336 mkexpr(argD)),
4337 mkexpr(argD))
4338 );
4339 put_ST_UNCHECKED(0, mkexpr(resD));
4340 /* Conditionally push 1.0 on the stack, if the arg is
4341 in range */
4342 maybe_fp_push(argOK);
4343 maybe_put_ST(argOK, 0,
4344 IRExpr_Const(IRConst_F64(1.0)));
4345 set_C2( binop(Iop_Xor32,
4346 unop(Iop_1Uto32, mkexpr(argOK)),
4347 mkU32(1)) );
4348 break;
4349 }
4350
4351 case 0xF3: /* FPATAN */
4352 DIP("fpatan\n");
4353 put_ST_UNCHECKED(1,
4354 triop(Iop_AtanF64,
4355 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4356 get_ST(1),
4357 get_ST(0)));
4358 fp_pop();
4359 break;
4360
4361 case 0xF4: { /* FXTRACT */
4362 IRTemp argF = newTemp(Ity_F64);
4363 IRTemp sigF = newTemp(Ity_F64);
4364 IRTemp expF = newTemp(Ity_F64);
4365 IRTemp argI = newTemp(Ity_I64);
4366 IRTemp sigI = newTemp(Ity_I64);
4367 IRTemp expI = newTemp(Ity_I64);
4368 DIP("fxtract\n");
4369 assign( argF, get_ST(0) );
4370 assign( argI, unop(Iop_ReinterpF64asI64, mkexpr(argF)));
4371 assign( sigI,
4372 mkIRExprCCall(
4373 Ity_I64, 0/*regparms*/,
4374 "x86amd64g_calculate_FXTRACT",
4375 &x86amd64g_calculate_FXTRACT,
4376 mkIRExprVec_2( mkexpr(argI),
4377 mkIRExpr_HWord(0)/*sig*/ ))
4378 );
4379 assign( expI,
4380 mkIRExprCCall(
4381 Ity_I64, 0/*regparms*/,
4382 "x86amd64g_calculate_FXTRACT",
4383 &x86amd64g_calculate_FXTRACT,
4384 mkIRExprVec_2( mkexpr(argI),
4385 mkIRExpr_HWord(1)/*exp*/ ))
4386 );
4387 assign( sigF, unop(Iop_ReinterpI64asF64, mkexpr(sigI)) );
4388 assign( expF, unop(Iop_ReinterpI64asF64, mkexpr(expI)) );
4389 /* exponent */
4390 put_ST_UNCHECKED(0, mkexpr(expF) );
4391 fp_push();
4392 /* significand */
4393 put_ST(0, mkexpr(sigF) );
4394 break;
4395 }
4396
4397 case 0xF5: { /* FPREM1 -- IEEE compliant */
4398 IRTemp a1 = newTemp(Ity_F64);
4399 IRTemp a2 = newTemp(Ity_F64);
4400 DIP("fprem1\n");
4401 /* Do FPREM1 twice, once to get the remainder, and once
4402 to get the C3210 flag values. */
4403 assign( a1, get_ST(0) );
4404 assign( a2, get_ST(1) );
4405 put_ST_UNCHECKED(0,
4406 triop(Iop_PRem1F64,
4407 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4408 mkexpr(a1),
4409 mkexpr(a2)));
4410 put_C3210(
4411 triop(Iop_PRem1C3210F64,
4412 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4413 mkexpr(a1),
4414 mkexpr(a2)) );
4415 break;
4416 }
4417
4418 case 0xF7: /* FINCSTP */
4419 DIP("fprem\n");
4420 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
4421 break;
4422
4423 case 0xF8: { /* FPREM -- not IEEE compliant */
4424 IRTemp a1 = newTemp(Ity_F64);
4425 IRTemp a2 = newTemp(Ity_F64);
4426 DIP("fprem\n");
4427 /* Do FPREM twice, once to get the remainder, and once
4428 to get the C3210 flag values. */
4429 assign( a1, get_ST(0) );
4430 assign( a2, get_ST(1) );
4431 put_ST_UNCHECKED(0,
4432 triop(Iop_PRemF64,
4433 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4434 mkexpr(a1),
4435 mkexpr(a2)));
4436 put_C3210(
4437 triop(Iop_PRemC3210F64,
4438 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4439 mkexpr(a1),
4440 mkexpr(a2)) );
4441 break;
4442 }
4443
4444 case 0xF9: /* FYL2XP1 */
4445 DIP("fyl2xp1\n");
4446 put_ST_UNCHECKED(1,
4447 triop(Iop_Yl2xp1F64,
4448 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4449 get_ST(1),
4450 get_ST(0)));
4451 fp_pop();
4452 break;
4453
4454 case 0xFA: /* FSQRT */
4455 DIP("fsqrt\n");
4456 put_ST_UNCHECKED(0,
4457 binop(Iop_SqrtF64,
4458 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4459 get_ST(0)));
4460 break;
4461
4462 case 0xFB: { /* FSINCOS */
4463 DIP("fsincos\n");
4464 IRTemp argD = newTemp(Ity_F64);
4465 assign(argD, get_ST(0));
4466 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD);
4467 IRTemp resD = newTemp(Ity_F64);
4468 assign(resD,
4469 IRExpr_ITE(
4470 mkexpr(argOK),
4471 binop(Iop_SinF64,
4472 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4473 mkexpr(argD)),
4474 mkexpr(argD))
4475 );
4476 put_ST_UNCHECKED(0, mkexpr(resD));
4477 /* Conditionally push the cos value on the stack, if
4478 the arg is in range */
4479 maybe_fp_push(argOK);
4480 maybe_put_ST(argOK, 0,
4481 binop(Iop_CosF64,
4482 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4483 mkexpr(argD)));
4484 set_C2( binop(Iop_Xor32,
4485 unop(Iop_1Uto32, mkexpr(argOK)),
4486 mkU32(1)) );
4487 break;
4488 }
4489
4490 case 0xFC: /* FRNDINT */
4491 DIP("frndint\n");
4492 put_ST_UNCHECKED(0,
4493 binop(Iop_RoundF64toInt, get_roundingmode(), get_ST(0)) );
4494 break;
4495
4496 case 0xFD: /* FSCALE */
4497 DIP("fscale\n");
4498 put_ST_UNCHECKED(0,
4499 triop(Iop_ScaleF64,
4500 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4501 get_ST(0),
4502 get_ST(1)));
4503 break;
4504
4505 case 0xFE: /* FSIN */
4506 case 0xFF: { /* FCOS */
4507 Bool isSIN = modrm == 0xFE;
4508 DIP("%s\n", isSIN ? "fsin" : "fcos");
4509 IRTemp argD = newTemp(Ity_F64);
4510 assign(argD, get_ST(0));
4511 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD);
4512 IRTemp resD = newTemp(Ity_F64);
4513 assign(resD,
4514 IRExpr_ITE(
4515 mkexpr(argOK),
4516 binop(isSIN ? Iop_SinF64 : Iop_CosF64,
4517 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4518 mkexpr(argD)),
4519 mkexpr(argD))
4520 );
4521 put_ST_UNCHECKED(0, mkexpr(resD));
4522 set_C2( binop(Iop_Xor32,
4523 unop(Iop_1Uto32, mkexpr(argOK)),
4524 mkU32(1)) );
4525 break;
4526 }
4527
4528 default:
4529 goto decode_fail;
4530 }
4531 }
4532 }
4533
4534 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */
4535 else
4536 if (first_opcode == 0xDA) {
4537
4538 if (modrm < 0xC0) {
4539
4540 /* bits 5,4,3 are an opcode extension, and the modRM also
4541 specifies an address. */
4542 IROp fop;
4543 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
4544 delta += len;
4545 switch (gregOfRM(modrm)) {
4546
4547 case 0: /* FIADD m32int */ /* ST(0) += m32int */
4548 DIP("fiaddl %s\n", dis_buf);
4549 fop = Iop_AddF64;
4550 goto do_fop_m32;
4551
4552 case 1: /* FIMUL m32int */ /* ST(0) *= m32int */
4553 DIP("fimull %s\n", dis_buf);
4554 fop = Iop_MulF64;
4555 goto do_fop_m32;
4556
4557 case 2: /* FICOM m32int */
4558 DIP("ficoml %s\n", dis_buf);
4559 /* This forces C1 to zero, which isn't right. */
4560 put_C3210(
4561 binop( Iop_And32,
4562 binop(Iop_Shl32,
4563 binop(Iop_CmpF64,
4564 get_ST(0),
4565 unop(Iop_I32StoF64,
4566 loadLE(Ity_I32,mkexpr(addr)))),
4567 mkU8(8)),
4568 mkU32(0x4500)
4569 ));
4570 break;
4571
4572 case 3: /* FICOMP m32int */
4573 DIP("ficompl %s\n", dis_buf);
4574 /* This forces C1 to zero, which isn't right. */
4575 put_C3210(
4576 binop( Iop_And32,
4577 binop(Iop_Shl32,
4578 binop(Iop_CmpF64,
4579 get_ST(0),
4580 unop(Iop_I32StoF64,
4581 loadLE(Ity_I32,mkexpr(addr)))),
4582 mkU8(8)),
4583 mkU32(0x4500)
4584 ));
4585 fp_pop();
4586 break;
4587
4588 case 4: /* FISUB m32int */ /* ST(0) -= m32int */
4589 DIP("fisubl %s\n", dis_buf);
4590 fop = Iop_SubF64;
4591 goto do_fop_m32;
4592
4593 case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */
4594 DIP("fisubrl %s\n", dis_buf);
4595 fop = Iop_SubF64;
4596 goto do_foprev_m32;
4597
4598 case 6: /* FIDIV m32int */ /* ST(0) /= m32int */
4599 DIP("fidivl %s\n", dis_buf);
4600 fop = Iop_DivF64;
4601 goto do_fop_m32;
4602
4603 case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */
4604 DIP("fidivrl %s\n", dis_buf);
4605 fop = Iop_DivF64;
4606 goto do_foprev_m32;
4607
4608 do_fop_m32:
4609 put_ST_UNCHECKED(0,
4610 triop(fop,
4611 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4612 get_ST(0),
4613 unop(Iop_I32StoF64,
4614 loadLE(Ity_I32, mkexpr(addr)))));
4615 break;
4616
4617 do_foprev_m32:
4618 put_ST_UNCHECKED(0,
4619 triop(fop,
4620 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4621 unop(Iop_I32StoF64,
4622 loadLE(Ity_I32, mkexpr(addr))),
4623 get_ST(0)));
4624 break;
4625
4626 default:
4627 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm));
4628 vex_printf("first_opcode == 0xDA\n");
4629 goto decode_fail;
4630 }
4631
4632 } else {
4633
4634 delta++;
4635 switch (modrm) {
4636
4637 case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */
4638 r_src = (UInt)modrm - 0xC0;
4639 DIP("fcmovb %%st(%d), %%st(0)\n", (Int)r_src);
4640 put_ST_UNCHECKED(0,
4641 IRExpr_ITE(
4642 mk_x86g_calculate_condition(X86CondB),
4643 get_ST(r_src), get_ST(0)) );
4644 break;
4645
4646 case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */
4647 r_src = (UInt)modrm - 0xC8;
4648 DIP("fcmovz %%st(%d), %%st(0)\n", (Int)r_src);
4649 put_ST_UNCHECKED(0,
4650 IRExpr_ITE(
4651 mk_x86g_calculate_condition(X86CondZ),
4652 get_ST(r_src), get_ST(0)) );
4653 break;
4654
4655 case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */
4656 r_src = (UInt)modrm - 0xD0;
4657 DIP("fcmovbe %%st(%d), %%st(0)\n", (Int)r_src);
4658 put_ST_UNCHECKED(0,
4659 IRExpr_ITE(
4660 mk_x86g_calculate_condition(X86CondBE),
4661 get_ST(r_src), get_ST(0)) );
4662 break;
4663
4664 case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */
4665 r_src = (UInt)modrm - 0xD8;
4666 DIP("fcmovu %%st(%d), %%st(0)\n", (Int)r_src);
4667 put_ST_UNCHECKED(0,
4668 IRExpr_ITE(
4669 mk_x86g_calculate_condition(X86CondP),
4670 get_ST(r_src), get_ST(0)) );
4671 break;
4672
4673 case 0xE9: /* FUCOMPP %st(0),%st(1) */
4674 DIP("fucompp %%st(0),%%st(1)\n");
4675 /* This forces C1 to zero, which isn't right. */
4676 put_C3210(
4677 binop( Iop_And32,
4678 binop(Iop_Shl32,
4679 binop(Iop_CmpF64, get_ST(0), get_ST(1)),
4680 mkU8(8)),
4681 mkU32(0x4500)
4682 ));
4683 fp_pop();
4684 fp_pop();
4685 break;
4686
4687 default:
4688 goto decode_fail;
4689 }
4690
4691 }
4692 }
4693
4694 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */
4695 else
4696 if (first_opcode == 0xDB) {
4697 if (modrm < 0xC0) {
4698
4699 /* bits 5,4,3 are an opcode extension, and the modRM also
4700 specifies an address. */
4701 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
4702 delta += len;
4703
4704 switch (gregOfRM(modrm)) {
4705
4706 case 0: /* FILD m32int */
4707 DIP("fildl %s\n", dis_buf);
4708 fp_push();
4709 put_ST(0, unop(Iop_I32StoF64,
4710 loadLE(Ity_I32, mkexpr(addr))));
4711 break;
4712
4713 case 1: /* FISTTPL m32 (SSE3) */
4714 DIP("fisttpl %s\n", dis_buf);
4715 storeLE( mkexpr(addr),
4716 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) );
4717 fp_pop();
4718 break;
4719
4720 case 2: /* FIST m32 */
4721 DIP("fistl %s\n", dis_buf);
4722 storeLE( mkexpr(addr),
4723 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) );
4724 break;
4725
4726 case 3: /* FISTP m32 */
4727 DIP("fistpl %s\n", dis_buf);
4728 storeLE( mkexpr(addr),
4729 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) );
4730 fp_pop();
4731 break;
4732
4733 case 5: { /* FLD extended-real */
4734 /* Uses dirty helper:
4735 ULong x86g_loadF80le ( UInt )
4736 addr holds the address. First, do a dirty call to
4737 get hold of the data. */
4738 IRTemp val = newTemp(Ity_I64);
4739 IRExpr** args = mkIRExprVec_1 ( mkexpr(addr) );
4740
4741 IRDirty* d = unsafeIRDirty_1_N (
4742 val,
4743 0/*regparms*/,
4744 "x86g_dirtyhelper_loadF80le",
4745 &x86g_dirtyhelper_loadF80le,
4746 args
4747 );
4748 /* declare that we're reading memory */
4749 d->mFx = Ifx_Read;
4750 d->mAddr = mkexpr(addr);
4751 d->mSize = 10;
4752
4753 /* execute the dirty call, dumping the result in val. */
4754 stmt( IRStmt_Dirty(d) );
4755 fp_push();
4756 put_ST(0, unop(Iop_ReinterpI64asF64, mkexpr(val)));
4757
4758 DIP("fldt %s\n", dis_buf);
4759 break;
4760 }
4761
4762 case 7: { /* FSTP extended-real */
4763 /* Uses dirty helper: void x86g_storeF80le ( UInt, ULong ) */
4764 IRExpr** args
4765 = mkIRExprVec_2( mkexpr(addr),
4766 unop(Iop_ReinterpF64asI64, get_ST(0)) );
4767
4768 IRDirty* d = unsafeIRDirty_0_N (
4769 0/*regparms*/,
4770 "x86g_dirtyhelper_storeF80le",
4771 &x86g_dirtyhelper_storeF80le,
4772 args
4773 );
4774 /* declare we're writing memory */
4775 d->mFx = Ifx_Write;
4776 d->mAddr = mkexpr(addr);
4777 d->mSize = 10;
4778
4779 /* execute the dirty call. */
4780 stmt( IRStmt_Dirty(d) );
4781 fp_pop();
4782
4783 DIP("fstpt\n %s", dis_buf);
4784 break;
4785 }
4786
4787 default:
4788 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm));
4789 vex_printf("first_opcode == 0xDB\n");
4790 goto decode_fail;
4791 }
4792
4793 } else {
4794
4795 delta++;
4796 switch (modrm) {
4797
4798 case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */
4799 r_src = (UInt)modrm - 0xC0;
4800 DIP("fcmovnb %%st(%d), %%st(0)\n", (Int)r_src);
4801 put_ST_UNCHECKED(0,
4802 IRExpr_ITE(
4803 mk_x86g_calculate_condition(X86CondNB),
4804 get_ST(r_src), get_ST(0)) );
4805 break;
4806
4807 case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */
4808 r_src = (UInt)modrm - 0xC8;
4809 DIP("fcmovnz %%st(%d), %%st(0)\n", (Int)r_src);
4810 put_ST_UNCHECKED(0,
4811 IRExpr_ITE(
4812 mk_x86g_calculate_condition(X86CondNZ),
4813 get_ST(r_src), get_ST(0)) );
4814 break;
4815
4816 case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */
4817 r_src = (UInt)modrm - 0xD0;
4818 DIP("fcmovnbe %%st(%d), %%st(0)\n", (Int)r_src);
4819 put_ST_UNCHECKED(0,
4820 IRExpr_ITE(
4821 mk_x86g_calculate_condition(X86CondNBE),
4822 get_ST(r_src), get_ST(0)) );
4823 break;
4824
4825 case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */
4826 r_src = (UInt)modrm - 0xD8;
4827 DIP("fcmovnu %%st(%d), %%st(0)\n", (Int)r_src);
4828 put_ST_UNCHECKED(0,
4829 IRExpr_ITE(
4830 mk_x86g_calculate_condition(X86CondNP),
4831 get_ST(r_src), get_ST(0)) );
4832 break;
4833
4834 case 0xE2:
4835 DIP("fnclex\n");
4836 break;
4837
4838 case 0xE3: {
4839 /* Uses dirty helper:
4840 void x86g_do_FINIT ( VexGuestX86State* ) */
4841 IRDirty* d = unsafeIRDirty_0_N (
4842 0/*regparms*/,
4843 "x86g_dirtyhelper_FINIT",
4844 &x86g_dirtyhelper_FINIT,
4845 mkIRExprVec_1(IRExpr_BBPTR())
4846 );
4847
4848 /* declare we're writing guest state */
4849 d->nFxState = 5;
4850 vex_bzero(&d->fxState, sizeof(d->fxState));
4851
4852 d->fxState[0].fx = Ifx_Write;
4853 d->fxState[0].offset = OFFB_FTOP;
4854 d->fxState[0].size = sizeof(UInt);
4855
4856 d->fxState[1].fx = Ifx_Write;
4857 d->fxState[1].offset = OFFB_FPREGS;
4858 d->fxState[1].size = 8 * sizeof(ULong);
4859
4860 d->fxState[2].fx = Ifx_Write;
4861 d->fxState[2].offset = OFFB_FPTAGS;
4862 d->fxState[2].size = 8 * sizeof(UChar);
4863
4864 d->fxState[3].fx = Ifx_Write;
4865 d->fxState[3].offset = OFFB_FPROUND;
4866 d->fxState[3].size = sizeof(UInt);
4867
4868 d->fxState[4].fx = Ifx_Write;
4869 d->fxState[4].offset = OFFB_FC3210;
4870 d->fxState[4].size = sizeof(UInt);
4871
4872 stmt( IRStmt_Dirty(d) );
4873
4874 DIP("fninit\n");
4875 break;
4876 }
4877
4878 case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */
4879 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, False );
4880 break;
4881
4882 case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */
4883 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, False );
4884 break;
4885
4886 default:
4887 goto decode_fail;
4888 }
4889 }
4890 }
4891
4892 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */
4893 else
4894 if (first_opcode == 0xDC) {
4895 if (modrm < 0xC0) {
4896
4897 /* bits 5,4,3 are an opcode extension, and the modRM also
4898 specifies an address. */
4899 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
4900 delta += len;
4901
4902 switch (gregOfRM(modrm)) {
4903
4904 case 0: /* FADD double-real */
4905 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, True );
4906 break;
4907
4908 case 1: /* FMUL double-real */
4909 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, True );
4910 break;
4911
4912 case 2: /* FCOM double-real */
4913 DIP("fcoml %s\n", dis_buf);
4914 /* This forces C1 to zero, which isn't right. */
4915 put_C3210(
4916 binop( Iop_And32,
4917 binop(Iop_Shl32,
4918 binop(Iop_CmpF64,
4919 get_ST(0),
4920 loadLE(Ity_F64,mkexpr(addr))),
4921 mkU8(8)),
4922 mkU32(0x4500)
4923 ));
4924 break;
4925
4926 case 3: /* FCOMP double-real */
4927 DIP("fcompl %s\n", dis_buf);
4928 /* This forces C1 to zero, which isn't right. */
4929 put_C3210(
4930 binop( Iop_And32,
4931 binop(Iop_Shl32,
4932 binop(Iop_CmpF64,
4933 get_ST(0),
4934 loadLE(Ity_F64,mkexpr(addr))),
4935 mkU8(8)),
4936 mkU32(0x4500)
4937 ));
4938 fp_pop();
4939 break;
4940
4941 case 4: /* FSUB double-real */
4942 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, True );
4943 break;
4944
4945 case 5: /* FSUBR double-real */
4946 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, True );
4947 break;
4948
4949 case 6: /* FDIV double-real */
4950 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, True );
4951 break;
4952
4953 case 7: /* FDIVR double-real */
4954 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, True );
4955 break;
4956
4957 default:
4958 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm));
4959 vex_printf("first_opcode == 0xDC\n");
4960 goto decode_fail;
4961 }
4962
4963 } else {
4964
4965 delta++;
4966 switch (modrm) {
4967
4968 case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */
4969 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, False );
4970 break;
4971
4972 case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */
4973 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, False );
4974 break;
4975
4976 case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */
4977 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, False );
4978 break;
4979
4980 case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */
4981 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, False );
4982 break;
4983
4984 case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */
4985 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, False );
4986 break;
4987
4988 case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */
4989 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, False );
4990 break;
4991
4992 default:
4993 goto decode_fail;
4994 }
4995
4996 }
4997 }
4998
4999 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */
5000 else
5001 if (first_opcode == 0xDD) {
5002
5003 if (modrm < 0xC0) {
5004
5005 /* bits 5,4,3 are an opcode extension, and the modRM also
5006 specifies an address. */
5007 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5008 delta += len;
5009
5010 switch (gregOfRM(modrm)) {
5011
5012 case 0: /* FLD double-real */
5013 DIP("fldl %s\n", dis_buf);
5014 fp_push();
5015 put_ST(0, loadLE(Ity_F64, mkexpr(addr)));
5016 break;
5017
5018 case 1: /* FISTTPQ m64 (SSE3) */
5019 DIP("fistppll %s\n", dis_buf);
5020 storeLE( mkexpr(addr),
5021 binop(Iop_F64toI64S, mkU32(Irrm_ZERO), get_ST(0)) );
5022 fp_pop();
5023 break;
5024
5025 case 2: /* FST double-real */
5026 DIP("fstl %s\n", dis_buf);
5027 storeLE(mkexpr(addr), get_ST(0));
5028 break;
5029
5030 case 3: /* FSTP double-real */
5031 DIP("fstpl %s\n", dis_buf);
5032 storeLE(mkexpr(addr), get_ST(0));
5033 fp_pop();
5034 break;
5035
5036 case 4: { /* FRSTOR m108 */
5037 /* Uses dirty helper:
5038 VexEmNote x86g_do_FRSTOR ( VexGuestX86State*, Addr32 ) */
5039 IRTemp ew = newTemp(Ity_I32);
5040 IRDirty* d = unsafeIRDirty_0_N (
5041 0/*regparms*/,
5042 "x86g_dirtyhelper_FRSTOR",
5043 &x86g_dirtyhelper_FRSTOR,
5044 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) )
5045 );
5046 d->tmp = ew;
5047 /* declare we're reading memory */
5048 d->mFx = Ifx_Read;
5049 d->mAddr = mkexpr(addr);
5050 d->mSize = 108;
5051
5052 /* declare we're writing guest state */
5053 d->nFxState = 5;
5054 vex_bzero(&d->fxState, sizeof(d->fxState));
5055
5056 d->fxState[0].fx = Ifx_Write;
5057 d->fxState[0].offset = OFFB_FTOP;
5058 d->fxState[0].size = sizeof(UInt);
5059
5060 d->fxState[1].fx = Ifx_Write;
5061 d->fxState[1].offset = OFFB_FPREGS;
5062 d->fxState[1].size = 8 * sizeof(ULong);
5063
5064 d->fxState[2].fx = Ifx_Write;
5065 d->fxState[2].offset = OFFB_FPTAGS;
5066 d->fxState[2].size = 8 * sizeof(UChar);
5067
5068 d->fxState[3].fx = Ifx_Write;
5069 d->fxState[3].offset = OFFB_FPROUND;
5070 d->fxState[3].size = sizeof(UInt);
5071
5072 d->fxState[4].fx = Ifx_Write;
5073 d->fxState[4].offset = OFFB_FC3210;
5074 d->fxState[4].size = sizeof(UInt);
5075
5076 stmt( IRStmt_Dirty(d) );
5077
5078 /* ew contains any emulation warning we may need to
5079 issue. If needed, side-exit to the next insn,
5080 reporting the warning, so that Valgrind's dispatcher
5081 sees the warning. */
5082 put_emwarn( mkexpr(ew) );
5083 stmt(
5084 IRStmt_Exit(
5085 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
5086 Ijk_EmWarn,
5087 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta),
5088 OFFB_EIP
5089 )
5090 );
5091
5092 DIP("frstor %s\n", dis_buf);
5093 break;
5094 }
5095
5096 case 6: { /* FNSAVE m108 */
5097 /* Uses dirty helper:
5098 void x86g_do_FSAVE ( VexGuestX86State*, UInt ) */
5099 IRDirty* d = unsafeIRDirty_0_N (
5100 0/*regparms*/,
5101 "x86g_dirtyhelper_FSAVE",
5102 &x86g_dirtyhelper_FSAVE,
5103 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) )
5104 );
5105 /* declare we're writing memory */
5106 d->mFx = Ifx_Write;
5107 d->mAddr = mkexpr(addr);
5108 d->mSize = 108;
5109
5110 /* declare we're reading guest state */
5111 d->nFxState = 5;
5112 vex_bzero(&d->fxState, sizeof(d->fxState));
5113
5114 d->fxState[0].fx = Ifx_Read;
5115 d->fxState[0].offset = OFFB_FTOP;
5116 d->fxState[0].size = sizeof(UInt);
5117
5118 d->fxState[1].fx = Ifx_Read;
5119 d->fxState[1].offset = OFFB_FPREGS;
5120 d->fxState[1].size = 8 * sizeof(ULong);
5121
5122 d->fxState[2].fx = Ifx_Read;
5123 d->fxState[2].offset = OFFB_FPTAGS;
5124 d->fxState[2].size = 8 * sizeof(UChar);
5125
5126 d->fxState[3].fx = Ifx_Read;
5127 d->fxState[3].offset = OFFB_FPROUND;
5128 d->fxState[3].size = sizeof(UInt);
5129
5130 d->fxState[4].fx = Ifx_Read;
5131 d->fxState[4].offset = OFFB_FC3210;
5132 d->fxState[4].size = sizeof(UInt);
5133
5134 stmt( IRStmt_Dirty(d) );
5135
5136 DIP("fnsave %s\n", dis_buf);
5137 break;
5138 }
5139
5140 case 7: { /* FNSTSW m16 */
5141 IRExpr* sw = get_FPU_sw();
5142 vassert(typeOfIRExpr(irsb->tyenv, sw) == Ity_I16);
5143 storeLE( mkexpr(addr), sw );
5144 DIP("fnstsw %s\n", dis_buf);
5145 break;
5146 }
5147
5148 default:
5149 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm));
5150 vex_printf("first_opcode == 0xDD\n");
5151 goto decode_fail;
5152 }
5153 } else {
5154 delta++;
5155 switch (modrm) {
5156
5157 case 0xC0 ... 0xC7: /* FFREE %st(?) */
5158 r_dst = (UInt)modrm - 0xC0;
5159 DIP("ffree %%st(%d)\n", (Int)r_dst);
5160 put_ST_TAG ( r_dst, mkU8(0) );
5161 break;
5162
5163 case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */
5164 r_dst = (UInt)modrm - 0xD0;
5165 DIP("fst %%st(0),%%st(%d)\n", (Int)r_dst);
5166 /* P4 manual says: "If the destination operand is a
5167 non-empty register, the invalid-operation exception
5168 is not generated. Hence put_ST_UNCHECKED. */
5169 put_ST_UNCHECKED(r_dst, get_ST(0));
5170 break;
5171
5172 case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */
5173 r_dst = (UInt)modrm - 0xD8;
5174 DIP("fstp %%st(0),%%st(%d)\n", (Int)r_dst);
5175 /* P4 manual says: "If the destination operand is a
5176 non-empty register, the invalid-operation exception
5177 is not generated. Hence put_ST_UNCHECKED. */
5178 put_ST_UNCHECKED(r_dst, get_ST(0));
5179 fp_pop();
5180 break;
5181
5182 case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */
5183 r_dst = (UInt)modrm - 0xE0;
5184 DIP("fucom %%st(0),%%st(%d)\n", (Int)r_dst);
5185 /* This forces C1 to zero, which isn't right. */
5186 put_C3210(
5187 binop( Iop_And32,
5188 binop(Iop_Shl32,
5189 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
5190 mkU8(8)),
5191 mkU32(0x4500)
5192 ));
5193 break;
5194
5195 case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */
5196 r_dst = (UInt)modrm - 0xE8;
5197 DIP("fucomp %%st(0),%%st(%d)\n", (Int)r_dst);
5198 /* This forces C1 to zero, which isn't right. */
5199 put_C3210(
5200 binop( Iop_And32,
5201 binop(Iop_Shl32,
5202 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
5203 mkU8(8)),
5204 mkU32(0x4500)
5205 ));
5206 fp_pop();
5207 break;
5208
5209 default:
5210 goto decode_fail;
5211 }
5212 }
5213 }
5214
5215 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */
5216 else
5217 if (first_opcode == 0xDE) {
5218
5219 if (modrm < 0xC0) {
5220
5221 /* bits 5,4,3 are an opcode extension, and the modRM also
5222 specifies an address. */
5223 IROp fop;
5224 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5225 delta += len;
5226
5227 switch (gregOfRM(modrm)) {
5228
5229 case 0: /* FIADD m16int */ /* ST(0) += m16int */
5230 DIP("fiaddw %s\n", dis_buf);
5231 fop = Iop_AddF64;
5232 goto do_fop_m16;
5233
5234 case 1: /* FIMUL m16int */ /* ST(0) *= m16int */
5235 DIP("fimulw %s\n", dis_buf);
5236 fop = Iop_MulF64;
5237 goto do_fop_m16;
5238
5239 case 2: /* FICOM m16int */
5240 DIP("ficomw %s\n", dis_buf);
5241 /* This forces C1 to zero, which isn't right. */
5242 put_C3210(
5243 binop( Iop_And32,
5244 binop(Iop_Shl32,
5245 binop(Iop_CmpF64,
5246 get_ST(0),
5247 unop(Iop_I32StoF64,
5248 unop(Iop_16Sto32,
5249 loadLE(Ity_I16,mkexpr(addr))))),
5250 mkU8(8)),
5251 mkU32(0x4500)
5252 ));
5253 break;
5254
5255 case 3: /* FICOMP m16int */
5256 DIP("ficompw %s\n", dis_buf);
5257 /* This forces C1 to zero, which isn't right. */
5258 put_C3210(
5259 binop( Iop_And32,
5260 binop(Iop_Shl32,
5261 binop(Iop_CmpF64,
5262 get_ST(0),
5263 unop(Iop_I32StoF64,
5264 unop(Iop_16Sto32,
5265 loadLE(Ity_I16,mkexpr(addr))))),
5266 mkU8(8)),
5267 mkU32(0x4500)
5268 ));
5269 fp_pop();
5270 break;
5271
5272 case 4: /* FISUB m16int */ /* ST(0) -= m16int */
5273 DIP("fisubw %s\n", dis_buf);
5274 fop = Iop_SubF64;
5275 goto do_fop_m16;
5276
5277 case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */
5278 DIP("fisubrw %s\n", dis_buf);
5279 fop = Iop_SubF64;
5280 goto do_foprev_m16;
5281
5282 case 6: /* FIDIV m16int */ /* ST(0) /= m16int */
5283 DIP("fisubw %s\n", dis_buf);
5284 fop = Iop_DivF64;
5285 goto do_fop_m16;
5286
5287 case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */
5288 DIP("fidivrw %s\n", dis_buf);
5289 fop = Iop_DivF64;
5290 goto do_foprev_m16;
5291
5292 do_fop_m16:
5293 put_ST_UNCHECKED(0,
5294 triop(fop,
5295 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5296 get_ST(0),
5297 unop(Iop_I32StoF64,
5298 unop(Iop_16Sto32,
5299 loadLE(Ity_I16, mkexpr(addr))))));
5300 break;
5301
5302 do_foprev_m16:
5303 put_ST_UNCHECKED(0,
5304 triop(fop,
5305 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5306 unop(Iop_I32StoF64,
5307 unop(Iop_16Sto32,
5308 loadLE(Ity_I16, mkexpr(addr)))),
5309 get_ST(0)));
5310 break;
5311
5312 default:
5313 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm));
5314 vex_printf("first_opcode == 0xDE\n");
5315 goto decode_fail;
5316 }
5317
5318 } else {
5319
5320 delta++;
5321 switch (modrm) {
5322
5323 case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */
5324 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, True );
5325 break;
5326
5327 case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */
5328 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, True );
5329 break;
5330
5331 case 0xD9: /* FCOMPP %st(0),%st(1) */
5332 DIP("fuompp %%st(0),%%st(1)\n");
5333 /* This forces C1 to zero, which isn't right. */
5334 put_C3210(
5335 binop( Iop_And32,
5336 binop(Iop_Shl32,
5337 binop(Iop_CmpF64, get_ST(0), get_ST(1)),
5338 mkU8(8)),
5339 mkU32(0x4500)
5340 ));
5341 fp_pop();
5342 fp_pop();
5343 break;
5344
5345 case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */
5346 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, True );
5347 break;
5348
5349 case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */
5350 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, True );
5351 break;
5352
5353 case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */
5354 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, True );
5355 break;
5356
5357 case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */
5358 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, True );
5359 break;
5360
5361 default:
5362 goto decode_fail;
5363 }
5364
5365 }
5366 }
5367
5368 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */
5369 else
5370 if (first_opcode == 0xDF) {
5371
5372 if (modrm < 0xC0) {
5373
5374 /* bits 5,4,3 are an opcode extension, and the modRM also
5375 specifies an address. */
5376 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5377 delta += len;
5378
5379 switch (gregOfRM(modrm)) {
5380
5381 case 0: /* FILD m16int */
5382 DIP("fildw %s\n", dis_buf);
5383 fp_push();
5384 put_ST(0, unop(Iop_I32StoF64,
5385 unop(Iop_16Sto32,
5386 loadLE(Ity_I16, mkexpr(addr)))));
5387 break;
5388
5389 case 1: /* FISTTPS m16 (SSE3) */
5390 DIP("fisttps %s\n", dis_buf);
5391 storeLE( mkexpr(addr),
5392 binop(Iop_F64toI16S, mkU32(Irrm_ZERO), get_ST(0)) );
5393 fp_pop();
5394 break;
5395
5396 case 2: /* FIST m16 */
5397 DIP("fistp %s\n", dis_buf);
5398 storeLE( mkexpr(addr),
5399 binop(Iop_F64toI16S, get_roundingmode(), get_ST(0)) );
5400 break;
5401
5402 case 3: /* FISTP m16 */
5403 DIP("fistps %s\n", dis_buf);
5404 storeLE( mkexpr(addr),
5405 binop(Iop_F64toI16S, get_roundingmode(), get_ST(0)) );
5406 fp_pop();
5407 break;
5408
5409 case 5: /* FILD m64 */
5410 DIP("fildll %s\n", dis_buf);
5411 fp_push();
5412 put_ST(0, binop(Iop_I64StoF64,
5413 get_roundingmode(),
5414 loadLE(Ity_I64, mkexpr(addr))));
5415 break;
5416
5417 case 7: /* FISTP m64 */
5418 DIP("fistpll %s\n", dis_buf);
5419 storeLE( mkexpr(addr),
5420 binop(Iop_F64toI64S, get_roundingmode(), get_ST(0)) );
5421 fp_pop();
5422 break;
5423
5424 default:
5425 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm));
5426 vex_printf("first_opcode == 0xDF\n");
5427 goto decode_fail;
5428 }
5429
5430 } else {
5431
5432 delta++;
5433 switch (modrm) {
5434
5435 case 0xC0: /* FFREEP %st(0) */
5436 DIP("ffreep %%st(%d)\n", 0);
5437 put_ST_TAG ( 0, mkU8(0) );
5438 fp_pop();
5439 break;
5440
5441 case 0xE0: /* FNSTSW %ax */
5442 DIP("fnstsw %%ax\n");
5443 /* Get the FPU status word value and dump it in %AX. */
5444 if (0) {
5445 /* The obvious thing to do is simply dump the 16-bit
5446 status word value in %AX. However, due to a
5447 limitation in Memcheck's origin tracking
5448 machinery, this causes Memcheck not to track the
5449 origin of any undefinedness into %AH (only into
5450 %AL/%AX/%EAX), which means origins are lost in
5451 the sequence "fnstsw %ax; test $M,%ah; jcond .." */
5452 putIReg(2, R_EAX, get_FPU_sw());
5453 } else {
5454 /* So a somewhat lame kludge is to make it very
5455 clear to Memcheck that the value is written to
5456 both %AH and %AL. This generates marginally
5457 worse code, but I don't think it matters much. */
5458 IRTemp t16 = newTemp(Ity_I16);
5459 assign(t16, get_FPU_sw());
5460 putIReg( 1, R_AL, unop(Iop_16to8, mkexpr(t16)) );
5461 putIReg( 1, R_AH, unop(Iop_16HIto8, mkexpr(t16)) );
5462 }
5463 break;
5464
5465 case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */
5466 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, True );
5467 break;
5468
5469 case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */
5470 /* not really right since COMIP != UCOMIP */
5471 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, True );
5472 break;
5473
5474 default:
5475 goto decode_fail;
5476 }
5477 }
5478
5479 }
5480
5481 else
5482 vpanic("dis_FPU(x86): invalid primary opcode");
5483
5484 *decode_ok = True;
5485 return delta;
5486
5487 decode_fail:
5488 *decode_ok = False;
5489 return delta;
5490 }
5491
5492
5493 /*------------------------------------------------------------*/
5494 /*--- ---*/
5495 /*--- MMX INSTRUCTIONS ---*/
5496 /*--- ---*/
5497 /*------------------------------------------------------------*/
5498
5499 /* Effect of MMX insns on x87 FPU state (table 11-2 of
5500 IA32 arch manual, volume 3):
5501
5502 Read from, or write to MMX register (viz, any insn except EMMS):
5503 * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero
5504 * FP stack pointer set to zero
5505
5506 EMMS:
5507 * All tags set to Invalid (empty) -- FPTAGS[i] := zero
5508 * FP stack pointer set to zero
5509 */
5510
do_MMX_preamble(void)5511 static void do_MMX_preamble ( void )
5512 {
5513 Int i;
5514 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
5515 IRExpr* zero = mkU32(0);
5516 IRExpr* tag1 = mkU8(1);
5517 put_ftop(zero);
5518 for (i = 0; i < 8; i++)
5519 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag1) ) );
5520 }
5521
do_EMMS_preamble(void)5522 static void do_EMMS_preamble ( void )
5523 {
5524 Int i;
5525 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
5526 IRExpr* zero = mkU32(0);
5527 IRExpr* tag0 = mkU8(0);
5528 put_ftop(zero);
5529 for (i = 0; i < 8; i++)
5530 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag0) ) );
5531 }
5532
5533
getMMXReg(UInt archreg)5534 static IRExpr* getMMXReg ( UInt archreg )
5535 {
5536 vassert(archreg < 8);
5537 return IRExpr_Get( OFFB_FPREGS + 8 * archreg, Ity_I64 );
5538 }
5539
5540
putMMXReg(UInt archreg,IRExpr * e)5541 static void putMMXReg ( UInt archreg, IRExpr* e )
5542 {
5543 vassert(archreg < 8);
5544 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
5545 stmt( IRStmt_Put( OFFB_FPREGS + 8 * archreg, e ) );
5546 }
5547
5548
5549 /* Helper for non-shift MMX insns. Note this is incomplete in the
5550 sense that it does not first call do_MMX_preamble() -- that is the
5551 responsibility of its caller. */
5552
5553 static
dis_MMXop_regmem_to_reg(UChar sorb,Int delta,UChar opc,const HChar * name,Bool show_granularity)5554 UInt dis_MMXop_regmem_to_reg ( UChar sorb,
5555 Int delta,
5556 UChar opc,
5557 const HChar* name,
5558 Bool show_granularity )
5559 {
5560 HChar dis_buf[50];
5561 UChar modrm = getIByte(delta);
5562 Bool isReg = epartIsReg(modrm);
5563 IRExpr* argL = NULL;
5564 IRExpr* argR = NULL;
5565 IRExpr* argG = NULL;
5566 IRExpr* argE = NULL;
5567 IRTemp res = newTemp(Ity_I64);
5568
5569 Bool invG = False;
5570 IROp op = Iop_INVALID;
5571 void* hAddr = NULL;
5572 Bool eLeft = False;
5573 const HChar* hName = NULL;
5574
5575 # define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0)
5576
5577 switch (opc) {
5578 /* Original MMX ones */
5579 case 0xFC: op = Iop_Add8x8; break;
5580 case 0xFD: op = Iop_Add16x4; break;
5581 case 0xFE: op = Iop_Add32x2; break;
5582
5583 case 0xEC: op = Iop_QAdd8Sx8; break;
5584 case 0xED: op = Iop_QAdd16Sx4; break;
5585
5586 case 0xDC: op = Iop_QAdd8Ux8; break;
5587 case 0xDD: op = Iop_QAdd16Ux4; break;
5588
5589 case 0xF8: op = Iop_Sub8x8; break;
5590 case 0xF9: op = Iop_Sub16x4; break;
5591 case 0xFA: op = Iop_Sub32x2; break;
5592
5593 case 0xE8: op = Iop_QSub8Sx8; break;
5594 case 0xE9: op = Iop_QSub16Sx4; break;
5595
5596 case 0xD8: op = Iop_QSub8Ux8; break;
5597 case 0xD9: op = Iop_QSub16Ux4; break;
5598
5599 case 0xE5: op = Iop_MulHi16Sx4; break;
5600 case 0xD5: op = Iop_Mul16x4; break;
5601 case 0xF5: XXX(x86g_calculate_mmx_pmaddwd); break;
5602
5603 case 0x74: op = Iop_CmpEQ8x8; break;
5604 case 0x75: op = Iop_CmpEQ16x4; break;
5605 case 0x76: op = Iop_CmpEQ32x2; break;
5606
5607 case 0x64: op = Iop_CmpGT8Sx8; break;
5608 case 0x65: op = Iop_CmpGT16Sx4; break;
5609 case 0x66: op = Iop_CmpGT32Sx2; break;
5610
5611 case 0x6B: op = Iop_QNarrowBin32Sto16Sx4; eLeft = True; break;
5612 case 0x63: op = Iop_QNarrowBin16Sto8Sx8; eLeft = True; break;
5613 case 0x67: op = Iop_QNarrowBin16Sto8Ux8; eLeft = True; break;
5614
5615 case 0x68: op = Iop_InterleaveHI8x8; eLeft = True; break;
5616 case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break;
5617 case 0x6A: op = Iop_InterleaveHI32x2; eLeft = True; break;
5618
5619 case 0x60: op = Iop_InterleaveLO8x8; eLeft = True; break;
5620 case 0x61: op = Iop_InterleaveLO16x4; eLeft = True; break;
5621 case 0x62: op = Iop_InterleaveLO32x2; eLeft = True; break;
5622
5623 case 0xDB: op = Iop_And64; break;
5624 case 0xDF: op = Iop_And64; invG = True; break;
5625 case 0xEB: op = Iop_Or64; break;
5626 case 0xEF: /* Possibly do better here if argL and argR are the
5627 same reg */
5628 op = Iop_Xor64; break;
5629
5630 /* Introduced in SSE1 */
5631 case 0xE0: op = Iop_Avg8Ux8; break;
5632 case 0xE3: op = Iop_Avg16Ux4; break;
5633 case 0xEE: op = Iop_Max16Sx4; break;
5634 case 0xDE: op = Iop_Max8Ux8; break;
5635 case 0xEA: op = Iop_Min16Sx4; break;
5636 case 0xDA: op = Iop_Min8Ux8; break;
5637 case 0xE4: op = Iop_MulHi16Ux4; break;
5638 case 0xF6: XXX(x86g_calculate_mmx_psadbw); break;
5639
5640 /* Introduced in SSE2 */
5641 case 0xD4: op = Iop_Add64; break;
5642 case 0xFB: op = Iop_Sub64; break;
5643
5644 default:
5645 vex_printf("\n0x%x\n", (Int)opc);
5646 vpanic("dis_MMXop_regmem_to_reg");
5647 }
5648
5649 # undef XXX
5650
5651 argG = getMMXReg(gregOfRM(modrm));
5652 if (invG)
5653 argG = unop(Iop_Not64, argG);
5654
5655 if (isReg) {
5656 delta++;
5657 argE = getMMXReg(eregOfRM(modrm));
5658 } else {
5659 Int len;
5660 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5661 delta += len;
5662 argE = loadLE(Ity_I64, mkexpr(addr));
5663 }
5664
5665 if (eLeft) {
5666 argL = argE;
5667 argR = argG;
5668 } else {
5669 argL = argG;
5670 argR = argE;
5671 }
5672
5673 if (op != Iop_INVALID) {
5674 vassert(hName == NULL);
5675 vassert(hAddr == NULL);
5676 assign(res, binop(op, argL, argR));
5677 } else {
5678 vassert(hName != NULL);
5679 vassert(hAddr != NULL);
5680 assign( res,
5681 mkIRExprCCall(
5682 Ity_I64,
5683 0/*regparms*/, hName, hAddr,
5684 mkIRExprVec_2( argL, argR )
5685 )
5686 );
5687 }
5688
5689 putMMXReg( gregOfRM(modrm), mkexpr(res) );
5690
5691 DIP("%s%s %s, %s\n",
5692 name, show_granularity ? nameMMXGran(opc & 3) : "",
5693 ( isReg ? nameMMXReg(eregOfRM(modrm)) : dis_buf ),
5694 nameMMXReg(gregOfRM(modrm)) );
5695
5696 return delta;
5697 }
5698
5699
5700 /* Vector by scalar shift of G by the amount specified at the bottom
5701 of E. This is a straight copy of dis_SSE_shiftG_byE. */
5702
dis_MMX_shiftG_byE(UChar sorb,Int delta,const HChar * opname,IROp op)5703 static UInt dis_MMX_shiftG_byE ( UChar sorb, Int delta,
5704 const HChar* opname, IROp op )
5705 {
5706 HChar dis_buf[50];
5707 Int alen, size;
5708 IRTemp addr;
5709 Bool shl, shr, sar;
5710 UChar rm = getIByte(delta);
5711 IRTemp g0 = newTemp(Ity_I64);
5712 IRTemp g1 = newTemp(Ity_I64);
5713 IRTemp amt = newTemp(Ity_I32);
5714 IRTemp amt8 = newTemp(Ity_I8);
5715
5716 if (epartIsReg(rm)) {
5717 assign( amt, unop(Iop_64to32, getMMXReg(eregOfRM(rm))) );
5718 DIP("%s %s,%s\n", opname,
5719 nameMMXReg(eregOfRM(rm)),
5720 nameMMXReg(gregOfRM(rm)) );
5721 delta++;
5722 } else {
5723 addr = disAMode ( &alen, sorb, delta, dis_buf );
5724 assign( amt, loadLE(Ity_I32, mkexpr(addr)) );
5725 DIP("%s %s,%s\n", opname,
5726 dis_buf,
5727 nameMMXReg(gregOfRM(rm)) );
5728 delta += alen;
5729 }
5730 assign( g0, getMMXReg(gregOfRM(rm)) );
5731 assign( amt8, unop(Iop_32to8, mkexpr(amt)) );
5732
5733 shl = shr = sar = False;
5734 size = 0;
5735 switch (op) {
5736 case Iop_ShlN16x4: shl = True; size = 32; break;
5737 case Iop_ShlN32x2: shl = True; size = 32; break;
5738 case Iop_Shl64: shl = True; size = 64; break;
5739 case Iop_ShrN16x4: shr = True; size = 16; break;
5740 case Iop_ShrN32x2: shr = True; size = 32; break;
5741 case Iop_Shr64: shr = True; size = 64; break;
5742 case Iop_SarN16x4: sar = True; size = 16; break;
5743 case Iop_SarN32x2: sar = True; size = 32; break;
5744 default: vassert(0);
5745 }
5746
5747 if (shl || shr) {
5748 assign(
5749 g1,
5750 IRExpr_ITE(
5751 binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size)),
5752 binop(op, mkexpr(g0), mkexpr(amt8)),
5753 mkU64(0)
5754 )
5755 );
5756 } else
5757 if (sar) {
5758 assign(
5759 g1,
5760 IRExpr_ITE(
5761 binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size)),
5762 binop(op, mkexpr(g0), mkexpr(amt8)),
5763 binop(op, mkexpr(g0), mkU8(size-1))
5764 )
5765 );
5766 } else {
5767 /*NOTREACHED*/
5768 vassert(0);
5769 }
5770
5771 putMMXReg( gregOfRM(rm), mkexpr(g1) );
5772 return delta;
5773 }
5774
5775
5776 /* Vector by scalar shift of E by an immediate byte. This is a
5777 straight copy of dis_SSE_shiftE_imm. */
5778
5779 static
dis_MMX_shiftE_imm(Int delta,const HChar * opname,IROp op)5780 UInt dis_MMX_shiftE_imm ( Int delta, const HChar* opname, IROp op )
5781 {
5782 Bool shl, shr, sar;
5783 UChar rm = getIByte(delta);
5784 IRTemp e0 = newTemp(Ity_I64);
5785 IRTemp e1 = newTemp(Ity_I64);
5786 UChar amt, size;
5787 vassert(epartIsReg(rm));
5788 vassert(gregOfRM(rm) == 2
5789 || gregOfRM(rm) == 4 || gregOfRM(rm) == 6);
5790 amt = getIByte(delta+1);
5791 delta += 2;
5792 DIP("%s $%d,%s\n", opname,
5793 (Int)amt,
5794 nameMMXReg(eregOfRM(rm)) );
5795
5796 assign( e0, getMMXReg(eregOfRM(rm)) );
5797
5798 shl = shr = sar = False;
5799 size = 0;
5800 switch (op) {
5801 case Iop_ShlN16x4: shl = True; size = 16; break;
5802 case Iop_ShlN32x2: shl = True; size = 32; break;
5803 case Iop_Shl64: shl = True; size = 64; break;
5804 case Iop_SarN16x4: sar = True; size = 16; break;
5805 case Iop_SarN32x2: sar = True; size = 32; break;
5806 case Iop_ShrN16x4: shr = True; size = 16; break;
5807 case Iop_ShrN32x2: shr = True; size = 32; break;
5808 case Iop_Shr64: shr = True; size = 64; break;
5809 default: vassert(0);
5810 }
5811
5812 if (shl || shr) {
5813 assign( e1, amt >= size
5814 ? mkU64(0)
5815 : binop(op, mkexpr(e0), mkU8(amt))
5816 );
5817 } else
5818 if (sar) {
5819 assign( e1, amt >= size
5820 ? binop(op, mkexpr(e0), mkU8(size-1))
5821 : binop(op, mkexpr(e0), mkU8(amt))
5822 );
5823 } else {
5824 /*NOTREACHED*/
5825 vassert(0);
5826 }
5827
5828 putMMXReg( eregOfRM(rm), mkexpr(e1) );
5829 return delta;
5830 }
5831
5832
5833 /* Completely handle all MMX instructions except emms. */
5834
5835 static
dis_MMX(Bool * decode_ok,UChar sorb,Int sz,Int delta)5836 UInt dis_MMX ( Bool* decode_ok, UChar sorb, Int sz, Int delta )
5837 {
5838 Int len;
5839 UChar modrm;
5840 HChar dis_buf[50];
5841 UChar opc = getIByte(delta);
5842 delta++;
5843
5844 /* dis_MMX handles all insns except emms. */
5845 do_MMX_preamble();
5846
5847 switch (opc) {
5848
5849 case 0x6E:
5850 /* MOVD (src)ireg-or-mem (E), (dst)mmxreg (G)*/
5851 if (sz != 4)
5852 goto mmx_decode_failure;
5853 modrm = getIByte(delta);
5854 if (epartIsReg(modrm)) {
5855 delta++;
5856 putMMXReg(
5857 gregOfRM(modrm),
5858 binop( Iop_32HLto64,
5859 mkU32(0),
5860 getIReg(4, eregOfRM(modrm)) ) );
5861 DIP("movd %s, %s\n",
5862 nameIReg(4,eregOfRM(modrm)), nameMMXReg(gregOfRM(modrm)));
5863 } else {
5864 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5865 delta += len;
5866 putMMXReg(
5867 gregOfRM(modrm),
5868 binop( Iop_32HLto64,
5869 mkU32(0),
5870 loadLE(Ity_I32, mkexpr(addr)) ) );
5871 DIP("movd %s, %s\n", dis_buf, nameMMXReg(gregOfRM(modrm)));
5872 }
5873 break;
5874
5875 case 0x7E: /* MOVD (src)mmxreg (G), (dst)ireg-or-mem (E) */
5876 if (sz != 4)
5877 goto mmx_decode_failure;
5878 modrm = getIByte(delta);
5879 if (epartIsReg(modrm)) {
5880 delta++;
5881 putIReg( 4, eregOfRM(modrm),
5882 unop(Iop_64to32, getMMXReg(gregOfRM(modrm)) ) );
5883 DIP("movd %s, %s\n",
5884 nameMMXReg(gregOfRM(modrm)), nameIReg(4,eregOfRM(modrm)));
5885 } else {
5886 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5887 delta += len;
5888 storeLE( mkexpr(addr),
5889 unop(Iop_64to32, getMMXReg(gregOfRM(modrm)) ) );
5890 DIP("movd %s, %s\n", nameMMXReg(gregOfRM(modrm)), dis_buf);
5891 }
5892 break;
5893
5894 case 0x6F:
5895 /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
5896 if (sz != 4)
5897 goto mmx_decode_failure;
5898 modrm = getIByte(delta);
5899 if (epartIsReg(modrm)) {
5900 delta++;
5901 putMMXReg( gregOfRM(modrm), getMMXReg(eregOfRM(modrm)) );
5902 DIP("movq %s, %s\n",
5903 nameMMXReg(eregOfRM(modrm)), nameMMXReg(gregOfRM(modrm)));
5904 } else {
5905 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5906 delta += len;
5907 putMMXReg( gregOfRM(modrm), loadLE(Ity_I64, mkexpr(addr)) );
5908 DIP("movq %s, %s\n",
5909 dis_buf, nameMMXReg(gregOfRM(modrm)));
5910 }
5911 break;
5912
5913 case 0x7F:
5914 /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
5915 if (sz != 4)
5916 goto mmx_decode_failure;
5917 modrm = getIByte(delta);
5918 if (epartIsReg(modrm)) {
5919 delta++;
5920 putMMXReg( eregOfRM(modrm), getMMXReg(gregOfRM(modrm)) );
5921 DIP("movq %s, %s\n",
5922 nameMMXReg(gregOfRM(modrm)), nameMMXReg(eregOfRM(modrm)));
5923 } else {
5924 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5925 delta += len;
5926 storeLE( mkexpr(addr), getMMXReg(gregOfRM(modrm)) );
5927 DIP("mov(nt)q %s, %s\n",
5928 nameMMXReg(gregOfRM(modrm)), dis_buf);
5929 }
5930 break;
5931
5932 case 0xFC:
5933 case 0xFD:
5934 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
5935 if (sz != 4)
5936 goto mmx_decode_failure;
5937 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "padd", True );
5938 break;
5939
5940 case 0xEC:
5941 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
5942 if (sz != 4)
5943 goto mmx_decode_failure;
5944 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "padds", True );
5945 break;
5946
5947 case 0xDC:
5948 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
5949 if (sz != 4)
5950 goto mmx_decode_failure;
5951 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "paddus", True );
5952 break;
5953
5954 case 0xF8:
5955 case 0xF9:
5956 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
5957 if (sz != 4)
5958 goto mmx_decode_failure;
5959 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psub", True );
5960 break;
5961
5962 case 0xE8:
5963 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
5964 if (sz != 4)
5965 goto mmx_decode_failure;
5966 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psubs", True );
5967 break;
5968
5969 case 0xD8:
5970 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
5971 if (sz != 4)
5972 goto mmx_decode_failure;
5973 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psubus", True );
5974 break;
5975
5976 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
5977 if (sz != 4)
5978 goto mmx_decode_failure;
5979 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pmulhw", False );
5980 break;
5981
5982 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
5983 if (sz != 4)
5984 goto mmx_decode_failure;
5985 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pmullw", False );
5986 break;
5987
5988 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
5989 vassert(sz == 4);
5990 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pmaddwd", False );
5991 break;
5992
5993 case 0x74:
5994 case 0x75:
5995 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
5996 if (sz != 4)
5997 goto mmx_decode_failure;
5998 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pcmpeq", True );
5999 break;
6000
6001 case 0x64:
6002 case 0x65:
6003 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
6004 if (sz != 4)
6005 goto mmx_decode_failure;
6006 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pcmpgt", True );
6007 break;
6008
6009 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
6010 if (sz != 4)
6011 goto mmx_decode_failure;
6012 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "packssdw", False );
6013 break;
6014
6015 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
6016 if (sz != 4)
6017 goto mmx_decode_failure;
6018 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "packsswb", False );
6019 break;
6020
6021 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
6022 if (sz != 4)
6023 goto mmx_decode_failure;
6024 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "packuswb", False );
6025 break;
6026
6027 case 0x68:
6028 case 0x69:
6029 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
6030 if (sz != 4)
6031 goto mmx_decode_failure;
6032 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "punpckh", True );
6033 break;
6034
6035 case 0x60:
6036 case 0x61:
6037 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
6038 if (sz != 4)
6039 goto mmx_decode_failure;
6040 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "punpckl", True );
6041 break;
6042
6043 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
6044 if (sz != 4)
6045 goto mmx_decode_failure;
6046 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pand", False );
6047 break;
6048
6049 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
6050 if (sz != 4)
6051 goto mmx_decode_failure;
6052 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pandn", False );
6053 break;
6054
6055 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
6056 if (sz != 4)
6057 goto mmx_decode_failure;
6058 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "por", False );
6059 break;
6060
6061 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
6062 if (sz != 4)
6063 goto mmx_decode_failure;
6064 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pxor", False );
6065 break;
6066
6067 # define SHIFT_BY_REG(_name,_op) \
6068 delta = dis_MMX_shiftG_byE(sorb, delta, _name, _op); \
6069 break;
6070
6071 /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
6072 case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4);
6073 case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2);
6074 case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64);
6075
6076 /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
6077 case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4);
6078 case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2);
6079 case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64);
6080
6081 /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
6082 case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4);
6083 case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2);
6084
6085 # undef SHIFT_BY_REG
6086
6087 case 0x71:
6088 case 0x72:
6089 case 0x73: {
6090 /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
6091 UChar byte2, subopc;
6092 if (sz != 4)
6093 goto mmx_decode_failure;
6094 byte2 = getIByte(delta); /* amode / sub-opcode */
6095 subopc = toUChar( (byte2 >> 3) & 7 );
6096
6097 # define SHIFT_BY_IMM(_name,_op) \
6098 do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \
6099 } while (0)
6100
6101 if (subopc == 2 /*SRL*/ && opc == 0x71)
6102 SHIFT_BY_IMM("psrlw", Iop_ShrN16x4);
6103 else if (subopc == 2 /*SRL*/ && opc == 0x72)
6104 SHIFT_BY_IMM("psrld", Iop_ShrN32x2);
6105 else if (subopc == 2 /*SRL*/ && opc == 0x73)
6106 SHIFT_BY_IMM("psrlq", Iop_Shr64);
6107
6108 else if (subopc == 4 /*SAR*/ && opc == 0x71)
6109 SHIFT_BY_IMM("psraw", Iop_SarN16x4);
6110 else if (subopc == 4 /*SAR*/ && opc == 0x72)
6111 SHIFT_BY_IMM("psrad", Iop_SarN32x2);
6112
6113 else if (subopc == 6 /*SHL*/ && opc == 0x71)
6114 SHIFT_BY_IMM("psllw", Iop_ShlN16x4);
6115 else if (subopc == 6 /*SHL*/ && opc == 0x72)
6116 SHIFT_BY_IMM("pslld", Iop_ShlN32x2);
6117 else if (subopc == 6 /*SHL*/ && opc == 0x73)
6118 SHIFT_BY_IMM("psllq", Iop_Shl64);
6119
6120 else goto mmx_decode_failure;
6121
6122 # undef SHIFT_BY_IMM
6123 break;
6124 }
6125
6126 case 0xF7: {
6127 IRTemp addr = newTemp(Ity_I32);
6128 IRTemp regD = newTemp(Ity_I64);
6129 IRTemp regM = newTemp(Ity_I64);
6130 IRTemp mask = newTemp(Ity_I64);
6131 IRTemp olddata = newTemp(Ity_I64);
6132 IRTemp newdata = newTemp(Ity_I64);
6133
6134 modrm = getIByte(delta);
6135 if (sz != 4 || (!epartIsReg(modrm)))
6136 goto mmx_decode_failure;
6137 delta++;
6138
6139 assign( addr, handleSegOverride( sorb, getIReg(4, R_EDI) ));
6140 assign( regM, getMMXReg( eregOfRM(modrm) ));
6141 assign( regD, getMMXReg( gregOfRM(modrm) ));
6142 assign( mask, binop(Iop_SarN8x8, mkexpr(regM), mkU8(7)) );
6143 assign( olddata, loadLE( Ity_I64, mkexpr(addr) ));
6144 assign( newdata,
6145 binop(Iop_Or64,
6146 binop(Iop_And64,
6147 mkexpr(regD),
6148 mkexpr(mask) ),
6149 binop(Iop_And64,
6150 mkexpr(olddata),
6151 unop(Iop_Not64, mkexpr(mask)))) );
6152 storeLE( mkexpr(addr), mkexpr(newdata) );
6153 DIP("maskmovq %s,%s\n", nameMMXReg( eregOfRM(modrm) ),
6154 nameMMXReg( gregOfRM(modrm) ) );
6155 break;
6156 }
6157
6158 /* --- MMX decode failure --- */
6159 default:
6160 mmx_decode_failure:
6161 *decode_ok = False;
6162 return delta; /* ignored */
6163
6164 }
6165
6166 *decode_ok = True;
6167 return delta;
6168 }
6169
6170
6171 /*------------------------------------------------------------*/
6172 /*--- More misc arithmetic and other obscure insns. ---*/
6173 /*------------------------------------------------------------*/
6174
6175 /* Double length left and right shifts. Apparently only required in
6176 v-size (no b- variant). */
6177 static
dis_SHLRD_Gv_Ev(UChar sorb,Int delta,UChar modrm,Int sz,IRExpr * shift_amt,Bool amt_is_literal,const HChar * shift_amt_txt,Bool left_shift)6178 UInt dis_SHLRD_Gv_Ev ( UChar sorb,
6179 Int delta, UChar modrm,
6180 Int sz,
6181 IRExpr* shift_amt,
6182 Bool amt_is_literal,
6183 const HChar* shift_amt_txt,
6184 Bool left_shift )
6185 {
6186 /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used
6187 for printing it. And eip on entry points at the modrm byte. */
6188 Int len;
6189 HChar dis_buf[50];
6190
6191 IRType ty = szToITy(sz);
6192 IRTemp gsrc = newTemp(ty);
6193 IRTemp esrc = newTemp(ty);
6194 IRTemp addr = IRTemp_INVALID;
6195 IRTemp tmpSH = newTemp(Ity_I8);
6196 IRTemp tmpL = IRTemp_INVALID;
6197 IRTemp tmpRes = IRTemp_INVALID;
6198 IRTemp tmpSubSh = IRTemp_INVALID;
6199 IROp mkpair;
6200 IROp getres;
6201 IROp shift;
6202 IRExpr* mask = NULL;
6203
6204 vassert(sz == 2 || sz == 4);
6205
6206 /* The E-part is the destination; this is shifted. The G-part
6207 supplies bits to be shifted into the E-part, but is not
6208 changed.
6209
6210 If shifting left, form a double-length word with E at the top
6211 and G at the bottom, and shift this left. The result is then in
6212 the high part.
6213
6214 If shifting right, form a double-length word with G at the top
6215 and E at the bottom, and shift this right. The result is then
6216 at the bottom. */
6217
6218 /* Fetch the operands. */
6219
6220 assign( gsrc, getIReg(sz, gregOfRM(modrm)) );
6221
6222 if (epartIsReg(modrm)) {
6223 delta++;
6224 assign( esrc, getIReg(sz, eregOfRM(modrm)) );
6225 DIP("sh%cd%c %s, %s, %s\n",
6226 ( left_shift ? 'l' : 'r' ), nameISize(sz),
6227 shift_amt_txt,
6228 nameIReg(sz, gregOfRM(modrm)), nameIReg(sz, eregOfRM(modrm)));
6229 } else {
6230 addr = disAMode ( &len, sorb, delta, dis_buf );
6231 delta += len;
6232 assign( esrc, loadLE(ty, mkexpr(addr)) );
6233 DIP("sh%cd%c %s, %s, %s\n",
6234 ( left_shift ? 'l' : 'r' ), nameISize(sz),
6235 shift_amt_txt,
6236 nameIReg(sz, gregOfRM(modrm)), dis_buf);
6237 }
6238
6239 /* Round up the relevant primops. */
6240
6241 if (sz == 4) {
6242 tmpL = newTemp(Ity_I64);
6243 tmpRes = newTemp(Ity_I32);
6244 tmpSubSh = newTemp(Ity_I32);
6245 mkpair = Iop_32HLto64;
6246 getres = left_shift ? Iop_64HIto32 : Iop_64to32;
6247 shift = left_shift ? Iop_Shl64 : Iop_Shr64;
6248 mask = mkU8(31);
6249 } else {
6250 /* sz == 2 */
6251 tmpL = newTemp(Ity_I32);
6252 tmpRes = newTemp(Ity_I16);
6253 tmpSubSh = newTemp(Ity_I16);
6254 mkpair = Iop_16HLto32;
6255 getres = left_shift ? Iop_32HIto16 : Iop_32to16;
6256 shift = left_shift ? Iop_Shl32 : Iop_Shr32;
6257 mask = mkU8(15);
6258 }
6259
6260 /* Do the shift, calculate the subshift value, and set
6261 the flag thunk. */
6262
6263 assign( tmpSH, binop(Iop_And8, shift_amt, mask) );
6264
6265 if (left_shift)
6266 assign( tmpL, binop(mkpair, mkexpr(esrc), mkexpr(gsrc)) );
6267 else
6268 assign( tmpL, binop(mkpair, mkexpr(gsrc), mkexpr(esrc)) );
6269
6270 assign( tmpRes, unop(getres, binop(shift, mkexpr(tmpL), mkexpr(tmpSH)) ) );
6271 assign( tmpSubSh,
6272 unop(getres,
6273 binop(shift,
6274 mkexpr(tmpL),
6275 binop(Iop_And8,
6276 binop(Iop_Sub8, mkexpr(tmpSH), mkU8(1) ),
6277 mask))) );
6278
6279 setFlags_DEP1_DEP2_shift ( left_shift ? Iop_Shl32 : Iop_Sar32,
6280 tmpRes, tmpSubSh, ty, tmpSH );
6281
6282 /* Put result back. */
6283
6284 if (epartIsReg(modrm)) {
6285 putIReg(sz, eregOfRM(modrm), mkexpr(tmpRes));
6286 } else {
6287 storeLE( mkexpr(addr), mkexpr(tmpRes) );
6288 }
6289
6290 if (amt_is_literal) delta++;
6291 return delta;
6292 }
6293
6294
6295 /* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not
6296 required. */
6297
6298 typedef enum { BtOpNone, BtOpSet, BtOpReset, BtOpComp } BtOp;
6299
nameBtOp(BtOp op)6300 static const HChar* nameBtOp ( BtOp op )
6301 {
6302 switch (op) {
6303 case BtOpNone: return "";
6304 case BtOpSet: return "s";
6305 case BtOpReset: return "r";
6306 case BtOpComp: return "c";
6307 default: vpanic("nameBtOp(x86)");
6308 }
6309 }
6310
6311
6312 static
dis_bt_G_E(VexAbiInfo * vbi,UChar sorb,Bool locked,Int sz,Int delta,BtOp op)6313 UInt dis_bt_G_E ( VexAbiInfo* vbi,
6314 UChar sorb, Bool locked, Int sz, Int delta, BtOp op )
6315 {
6316 HChar dis_buf[50];
6317 UChar modrm;
6318 Int len;
6319 IRTemp t_fetched, t_bitno0, t_bitno1, t_bitno2, t_addr0,
6320 t_addr1, t_esp, t_mask, t_new;
6321
6322 vassert(sz == 2 || sz == 4);
6323
6324 t_fetched = t_bitno0 = t_bitno1 = t_bitno2
6325 = t_addr0 = t_addr1 = t_esp
6326 = t_mask = t_new = IRTemp_INVALID;
6327
6328 t_fetched = newTemp(Ity_I8);
6329 t_new = newTemp(Ity_I8);
6330 t_bitno0 = newTemp(Ity_I32);
6331 t_bitno1 = newTemp(Ity_I32);
6332 t_bitno2 = newTemp(Ity_I8);
6333 t_addr1 = newTemp(Ity_I32);
6334 modrm = getIByte(delta);
6335
6336 assign( t_bitno0, widenSto32(getIReg(sz, gregOfRM(modrm))) );
6337
6338 if (epartIsReg(modrm)) {
6339 delta++;
6340 /* Get it onto the client's stack. */
6341 t_esp = newTemp(Ity_I32);
6342 t_addr0 = newTemp(Ity_I32);
6343
6344 /* For the choice of the value 128, see comment in dis_bt_G_E in
6345 guest_amd64_toIR.c. We point out here only that 128 is
6346 fast-cased in Memcheck and is > 0, so seems like a good
6347 choice. */
6348 vassert(vbi->guest_stack_redzone_size == 0);
6349 assign( t_esp, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(128)) );
6350 putIReg(4, R_ESP, mkexpr(t_esp));
6351
6352 storeLE( mkexpr(t_esp), getIReg(sz, eregOfRM(modrm)) );
6353
6354 /* Make t_addr0 point at it. */
6355 assign( t_addr0, mkexpr(t_esp) );
6356
6357 /* Mask out upper bits of the shift amount, since we're doing a
6358 reg. */
6359 assign( t_bitno1, binop(Iop_And32,
6360 mkexpr(t_bitno0),
6361 mkU32(sz == 4 ? 31 : 15)) );
6362
6363 } else {
6364 t_addr0 = disAMode ( &len, sorb, delta, dis_buf );
6365 delta += len;
6366 assign( t_bitno1, mkexpr(t_bitno0) );
6367 }
6368
6369 /* At this point: t_addr0 is the address being operated on. If it
6370 was a reg, we will have pushed it onto the client's stack.
6371 t_bitno1 is the bit number, suitably masked in the case of a
6372 reg. */
6373
6374 /* Now the main sequence. */
6375 assign( t_addr1,
6376 binop(Iop_Add32,
6377 mkexpr(t_addr0),
6378 binop(Iop_Sar32, mkexpr(t_bitno1), mkU8(3))) );
6379
6380 /* t_addr1 now holds effective address */
6381
6382 assign( t_bitno2,
6383 unop(Iop_32to8,
6384 binop(Iop_And32, mkexpr(t_bitno1), mkU32(7))) );
6385
6386 /* t_bitno2 contains offset of bit within byte */
6387
6388 if (op != BtOpNone) {
6389 t_mask = newTemp(Ity_I8);
6390 assign( t_mask, binop(Iop_Shl8, mkU8(1), mkexpr(t_bitno2)) );
6391 }
6392
6393 /* t_mask is now a suitable byte mask */
6394
6395 assign( t_fetched, loadLE(Ity_I8, mkexpr(t_addr1)) );
6396
6397 if (op != BtOpNone) {
6398 switch (op) {
6399 case BtOpSet:
6400 assign( t_new,
6401 binop(Iop_Or8, mkexpr(t_fetched), mkexpr(t_mask)) );
6402 break;
6403 case BtOpComp:
6404 assign( t_new,
6405 binop(Iop_Xor8, mkexpr(t_fetched), mkexpr(t_mask)) );
6406 break;
6407 case BtOpReset:
6408 assign( t_new,
6409 binop(Iop_And8, mkexpr(t_fetched),
6410 unop(Iop_Not8, mkexpr(t_mask))) );
6411 break;
6412 default:
6413 vpanic("dis_bt_G_E(x86)");
6414 }
6415 if (locked && !epartIsReg(modrm)) {
6416 casLE( mkexpr(t_addr1), mkexpr(t_fetched)/*expd*/,
6417 mkexpr(t_new)/*new*/,
6418 guest_EIP_curr_instr );
6419 } else {
6420 storeLE( mkexpr(t_addr1), mkexpr(t_new) );
6421 }
6422 }
6423
6424 /* Side effect done; now get selected bit into Carry flag */
6425 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
6426 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
6427 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
6428 stmt( IRStmt_Put(
6429 OFFB_CC_DEP1,
6430 binop(Iop_And32,
6431 binop(Iop_Shr32,
6432 unop(Iop_8Uto32, mkexpr(t_fetched)),
6433 mkexpr(t_bitno2)),
6434 mkU32(1)))
6435 );
6436 /* Set NDEP even though it isn't used. This makes redundant-PUT
6437 elimination of previous stores to this field work better. */
6438 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
6439
6440 /* Move reg operand from stack back to reg */
6441 if (epartIsReg(modrm)) {
6442 /* t_esp still points at it. */
6443 putIReg(sz, eregOfRM(modrm), loadLE(szToITy(sz), mkexpr(t_esp)) );
6444 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t_esp), mkU32(128)) );
6445 }
6446
6447 DIP("bt%s%c %s, %s\n",
6448 nameBtOp(op), nameISize(sz), nameIReg(sz, gregOfRM(modrm)),
6449 ( epartIsReg(modrm) ? nameIReg(sz, eregOfRM(modrm)) : dis_buf ) );
6450
6451 return delta;
6452 }
6453
6454
6455
6456 /* Handle BSF/BSR. Only v-size seems necessary. */
6457 static
dis_bs_E_G(UChar sorb,Int sz,Int delta,Bool fwds)6458 UInt dis_bs_E_G ( UChar sorb, Int sz, Int delta, Bool fwds )
6459 {
6460 Bool isReg;
6461 UChar modrm;
6462 HChar dis_buf[50];
6463
6464 IRType ty = szToITy(sz);
6465 IRTemp src = newTemp(ty);
6466 IRTemp dst = newTemp(ty);
6467
6468 IRTemp src32 = newTemp(Ity_I32);
6469 IRTemp dst32 = newTemp(Ity_I32);
6470 IRTemp srcB = newTemp(Ity_I1);
6471
6472 vassert(sz == 4 || sz == 2);
6473
6474 modrm = getIByte(delta);
6475
6476 isReg = epartIsReg(modrm);
6477 if (isReg) {
6478 delta++;
6479 assign( src, getIReg(sz, eregOfRM(modrm)) );
6480 } else {
6481 Int len;
6482 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
6483 delta += len;
6484 assign( src, loadLE(ty, mkexpr(addr)) );
6485 }
6486
6487 DIP("bs%c%c %s, %s\n",
6488 fwds ? 'f' : 'r', nameISize(sz),
6489 ( isReg ? nameIReg(sz, eregOfRM(modrm)) : dis_buf ),
6490 nameIReg(sz, gregOfRM(modrm)));
6491
6492 /* Generate a bool expression which is zero iff the original is
6493 zero, and nonzero otherwise. Ask for a CmpNE version which, if
6494 instrumented by Memcheck, is instrumented expensively, since
6495 this may be used on the output of a preceding movmskb insn,
6496 which has been known to be partially defined, and in need of
6497 careful handling. */
6498 assign( srcB, binop(mkSizedOp(ty,Iop_ExpCmpNE8),
6499 mkexpr(src), mkU(ty,0)) );
6500
6501 /* Flags: Z is 1 iff source value is zero. All others
6502 are undefined -- we force them to zero. */
6503 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
6504 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
6505 stmt( IRStmt_Put(
6506 OFFB_CC_DEP1,
6507 IRExpr_ITE( mkexpr(srcB),
6508 /* src!=0 */
6509 mkU32(0),
6510 /* src==0 */
6511 mkU32(X86G_CC_MASK_Z)
6512 )
6513 ));
6514 /* Set NDEP even though it isn't used. This makes redundant-PUT
6515 elimination of previous stores to this field work better. */
6516 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
6517
6518 /* Result: iff source value is zero, we can't use
6519 Iop_Clz32/Iop_Ctz32 as they have no defined result in that case.
6520 But anyway, Intel x86 semantics say the result is undefined in
6521 such situations. Hence handle the zero case specially. */
6522
6523 /* Bleh. What we compute:
6524
6525 bsf32: if src == 0 then 0 else Ctz32(src)
6526 bsr32: if src == 0 then 0 else 31 - Clz32(src)
6527
6528 bsf16: if src == 0 then 0 else Ctz32(16Uto32(src))
6529 bsr16: if src == 0 then 0 else 31 - Clz32(16Uto32(src))
6530
6531 First, widen src to 32 bits if it is not already.
6532
6533 Postscript 15 Oct 04: it seems that at least VIA Nehemiah leaves the
6534 dst register unchanged when src == 0. Hence change accordingly.
6535 */
6536 if (sz == 2)
6537 assign( src32, unop(Iop_16Uto32, mkexpr(src)) );
6538 else
6539 assign( src32, mkexpr(src) );
6540
6541 /* The main computation, guarding against zero. */
6542 assign( dst32,
6543 IRExpr_ITE(
6544 mkexpr(srcB),
6545 /* src != 0 */
6546 fwds ? unop(Iop_Ctz32, mkexpr(src32))
6547 : binop(Iop_Sub32,
6548 mkU32(31),
6549 unop(Iop_Clz32, mkexpr(src32))),
6550 /* src == 0 -- leave dst unchanged */
6551 widenUto32( getIReg( sz, gregOfRM(modrm) ) )
6552 )
6553 );
6554
6555 if (sz == 2)
6556 assign( dst, unop(Iop_32to16, mkexpr(dst32)) );
6557 else
6558 assign( dst, mkexpr(dst32) );
6559
6560 /* dump result back */
6561 putIReg( sz, gregOfRM(modrm), mkexpr(dst) );
6562
6563 return delta;
6564 }
6565
6566
6567 static
codegen_xchg_eAX_Reg(Int sz,Int reg)6568 void codegen_xchg_eAX_Reg ( Int sz, Int reg )
6569 {
6570 IRType ty = szToITy(sz);
6571 IRTemp t1 = newTemp(ty);
6572 IRTemp t2 = newTemp(ty);
6573 vassert(sz == 2 || sz == 4);
6574 assign( t1, getIReg(sz, R_EAX) );
6575 assign( t2, getIReg(sz, reg) );
6576 putIReg( sz, R_EAX, mkexpr(t2) );
6577 putIReg( sz, reg, mkexpr(t1) );
6578 DIP("xchg%c %s, %s\n",
6579 nameISize(sz), nameIReg(sz, R_EAX), nameIReg(sz, reg));
6580 }
6581
6582
6583 static
codegen_SAHF(void)6584 void codegen_SAHF ( void )
6585 {
6586 /* Set the flags to:
6587 (x86g_calculate_flags_all() & X86G_CC_MASK_O) -- retain the old O flag
6588 | (%AH & (X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A
6589 |X86G_CC_MASK_P|X86G_CC_MASK_C)
6590 */
6591 UInt mask_SZACP = X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A
6592 |X86G_CC_MASK_C|X86G_CC_MASK_P;
6593 IRTemp oldflags = newTemp(Ity_I32);
6594 assign( oldflags, mk_x86g_calculate_eflags_all() );
6595 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
6596 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
6597 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
6598 stmt( IRStmt_Put( OFFB_CC_DEP1,
6599 binop(Iop_Or32,
6600 binop(Iop_And32, mkexpr(oldflags), mkU32(X86G_CC_MASK_O)),
6601 binop(Iop_And32,
6602 binop(Iop_Shr32, getIReg(4, R_EAX), mkU8(8)),
6603 mkU32(mask_SZACP))
6604 )
6605 ));
6606 /* Set NDEP even though it isn't used. This makes redundant-PUT
6607 elimination of previous stores to this field work better. */
6608 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
6609 }
6610
6611
6612 static
codegen_LAHF(void)6613 void codegen_LAHF ( void )
6614 {
6615 /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */
6616 IRExpr* eax_with_hole;
6617 IRExpr* new_byte;
6618 IRExpr* new_eax;
6619 UInt mask_SZACP = X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A
6620 |X86G_CC_MASK_C|X86G_CC_MASK_P;
6621
6622 IRTemp flags = newTemp(Ity_I32);
6623 assign( flags, mk_x86g_calculate_eflags_all() );
6624
6625 eax_with_hole
6626 = binop(Iop_And32, getIReg(4, R_EAX), mkU32(0xFFFF00FF));
6627 new_byte
6628 = binop(Iop_Or32, binop(Iop_And32, mkexpr(flags), mkU32(mask_SZACP)),
6629 mkU32(1<<1));
6630 new_eax
6631 = binop(Iop_Or32, eax_with_hole,
6632 binop(Iop_Shl32, new_byte, mkU8(8)));
6633 putIReg(4, R_EAX, new_eax);
6634 }
6635
6636
6637 static
dis_cmpxchg_G_E(UChar sorb,Bool locked,Int size,Int delta0)6638 UInt dis_cmpxchg_G_E ( UChar sorb,
6639 Bool locked,
6640 Int size,
6641 Int delta0 )
6642 {
6643 HChar dis_buf[50];
6644 Int len;
6645
6646 IRType ty = szToITy(size);
6647 IRTemp acc = newTemp(ty);
6648 IRTemp src = newTemp(ty);
6649 IRTemp dest = newTemp(ty);
6650 IRTemp dest2 = newTemp(ty);
6651 IRTemp acc2 = newTemp(ty);
6652 IRTemp cond = newTemp(Ity_I1);
6653 IRTemp addr = IRTemp_INVALID;
6654 UChar rm = getUChar(delta0);
6655
6656 /* There are 3 cases to consider:
6657
6658 reg-reg: ignore any lock prefix, generate sequence based
6659 on ITE
6660
6661 reg-mem, not locked: ignore any lock prefix, generate sequence
6662 based on ITE
6663
6664 reg-mem, locked: use IRCAS
6665 */
6666 if (epartIsReg(rm)) {
6667 /* case 1 */
6668 assign( dest, getIReg(size, eregOfRM(rm)) );
6669 delta0++;
6670 assign( src, getIReg(size, gregOfRM(rm)) );
6671 assign( acc, getIReg(size, R_EAX) );
6672 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
6673 assign( cond, mk_x86g_calculate_condition(X86CondZ) );
6674 assign( dest2, IRExpr_ITE(mkexpr(cond), mkexpr(src), mkexpr(dest)) );
6675 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) );
6676 putIReg(size, R_EAX, mkexpr(acc2));
6677 putIReg(size, eregOfRM(rm), mkexpr(dest2));
6678 DIP("cmpxchg%c %s,%s\n", nameISize(size),
6679 nameIReg(size,gregOfRM(rm)),
6680 nameIReg(size,eregOfRM(rm)) );
6681 }
6682 else if (!epartIsReg(rm) && !locked) {
6683 /* case 2 */
6684 addr = disAMode ( &len, sorb, delta0, dis_buf );
6685 assign( dest, loadLE(ty, mkexpr(addr)) );
6686 delta0 += len;
6687 assign( src, getIReg(size, gregOfRM(rm)) );
6688 assign( acc, getIReg(size, R_EAX) );
6689 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
6690 assign( cond, mk_x86g_calculate_condition(X86CondZ) );
6691 assign( dest2, IRExpr_ITE(mkexpr(cond), mkexpr(src), mkexpr(dest)) );
6692 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) );
6693 putIReg(size, R_EAX, mkexpr(acc2));
6694 storeLE( mkexpr(addr), mkexpr(dest2) );
6695 DIP("cmpxchg%c %s,%s\n", nameISize(size),
6696 nameIReg(size,gregOfRM(rm)), dis_buf);
6697 }
6698 else if (!epartIsReg(rm) && locked) {
6699 /* case 3 */
6700 /* src is new value. acc is expected value. dest is old value.
6701 Compute success from the output of the IRCAS, and steer the
6702 new value for EAX accordingly: in case of success, EAX is
6703 unchanged. */
6704 addr = disAMode ( &len, sorb, delta0, dis_buf );
6705 delta0 += len;
6706 assign( src, getIReg(size, gregOfRM(rm)) );
6707 assign( acc, getIReg(size, R_EAX) );
6708 stmt( IRStmt_CAS(
6709 mkIRCAS( IRTemp_INVALID, dest, Iend_LE, mkexpr(addr),
6710 NULL, mkexpr(acc), NULL, mkexpr(src) )
6711 ));
6712 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
6713 assign( cond, mk_x86g_calculate_condition(X86CondZ) );
6714 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) );
6715 putIReg(size, R_EAX, mkexpr(acc2));
6716 DIP("cmpxchg%c %s,%s\n", nameISize(size),
6717 nameIReg(size,gregOfRM(rm)), dis_buf);
6718 }
6719 else vassert(0);
6720
6721 return delta0;
6722 }
6723
6724
6725 /* Handle conditional move instructions of the form
6726 cmovcc E(reg-or-mem), G(reg)
6727
6728 E(src) is reg-or-mem
6729 G(dst) is reg.
6730
6731 If E is reg, --> GET %E, tmps
6732 GET %G, tmpd
6733 CMOVcc tmps, tmpd
6734 PUT tmpd, %G
6735
6736 If E is mem --> (getAddr E) -> tmpa
6737 LD (tmpa), tmps
6738 GET %G, tmpd
6739 CMOVcc tmps, tmpd
6740 PUT tmpd, %G
6741 */
6742 static
dis_cmov_E_G(UChar sorb,Int sz,X86Condcode cond,Int delta0)6743 UInt dis_cmov_E_G ( UChar sorb,
6744 Int sz,
6745 X86Condcode cond,
6746 Int delta0 )
6747 {
6748 UChar rm = getIByte(delta0);
6749 HChar dis_buf[50];
6750 Int len;
6751
6752 IRType ty = szToITy(sz);
6753 IRTemp tmps = newTemp(ty);
6754 IRTemp tmpd = newTemp(ty);
6755
6756 if (epartIsReg(rm)) {
6757 assign( tmps, getIReg(sz, eregOfRM(rm)) );
6758 assign( tmpd, getIReg(sz, gregOfRM(rm)) );
6759
6760 putIReg(sz, gregOfRM(rm),
6761 IRExpr_ITE( mk_x86g_calculate_condition(cond),
6762 mkexpr(tmps),
6763 mkexpr(tmpd) )
6764 );
6765 DIP("cmov%c%s %s,%s\n", nameISize(sz),
6766 name_X86Condcode(cond),
6767 nameIReg(sz,eregOfRM(rm)),
6768 nameIReg(sz,gregOfRM(rm)));
6769 return 1+delta0;
6770 }
6771
6772 /* E refers to memory */
6773 {
6774 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf );
6775 assign( tmps, loadLE(ty, mkexpr(addr)) );
6776 assign( tmpd, getIReg(sz, gregOfRM(rm)) );
6777
6778 putIReg(sz, gregOfRM(rm),
6779 IRExpr_ITE( mk_x86g_calculate_condition(cond),
6780 mkexpr(tmps),
6781 mkexpr(tmpd) )
6782 );
6783
6784 DIP("cmov%c%s %s,%s\n", nameISize(sz),
6785 name_X86Condcode(cond),
6786 dis_buf,
6787 nameIReg(sz,gregOfRM(rm)));
6788 return len+delta0;
6789 }
6790 }
6791
6792
6793 static
dis_xadd_G_E(UChar sorb,Bool locked,Int sz,Int delta0,Bool * decodeOK)6794 UInt dis_xadd_G_E ( UChar sorb, Bool locked, Int sz, Int delta0,
6795 Bool* decodeOK )
6796 {
6797 Int len;
6798 UChar rm = getIByte(delta0);
6799 HChar dis_buf[50];
6800
6801 IRType ty = szToITy(sz);
6802 IRTemp tmpd = newTemp(ty);
6803 IRTemp tmpt0 = newTemp(ty);
6804 IRTemp tmpt1 = newTemp(ty);
6805
6806 /* There are 3 cases to consider:
6807
6808 reg-reg: ignore any lock prefix,
6809 generate 'naive' (non-atomic) sequence
6810
6811 reg-mem, not locked: ignore any lock prefix, generate 'naive'
6812 (non-atomic) sequence
6813
6814 reg-mem, locked: use IRCAS
6815 */
6816
6817 if (epartIsReg(rm)) {
6818 /* case 1 */
6819 assign( tmpd, getIReg(sz, eregOfRM(rm)));
6820 assign( tmpt0, getIReg(sz, gregOfRM(rm)) );
6821 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
6822 mkexpr(tmpd), mkexpr(tmpt0)) );
6823 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
6824 putIReg(sz, eregOfRM(rm), mkexpr(tmpt1));
6825 putIReg(sz, gregOfRM(rm), mkexpr(tmpd));
6826 DIP("xadd%c %s, %s\n",
6827 nameISize(sz), nameIReg(sz,gregOfRM(rm)),
6828 nameIReg(sz,eregOfRM(rm)));
6829 *decodeOK = True;
6830 return 1+delta0;
6831 }
6832 else if (!epartIsReg(rm) && !locked) {
6833 /* case 2 */
6834 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf );
6835 assign( tmpd, loadLE(ty, mkexpr(addr)) );
6836 assign( tmpt0, getIReg(sz, gregOfRM(rm)) );
6837 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
6838 mkexpr(tmpd), mkexpr(tmpt0)) );
6839 storeLE( mkexpr(addr), mkexpr(tmpt1) );
6840 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
6841 putIReg(sz, gregOfRM(rm), mkexpr(tmpd));
6842 DIP("xadd%c %s, %s\n",
6843 nameISize(sz), nameIReg(sz,gregOfRM(rm)), dis_buf);
6844 *decodeOK = True;
6845 return len+delta0;
6846 }
6847 else if (!epartIsReg(rm) && locked) {
6848 /* case 3 */
6849 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf );
6850 assign( tmpd, loadLE(ty, mkexpr(addr)) );
6851 assign( tmpt0, getIReg(sz, gregOfRM(rm)) );
6852 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
6853 mkexpr(tmpd), mkexpr(tmpt0)) );
6854 casLE( mkexpr(addr), mkexpr(tmpd)/*expVal*/,
6855 mkexpr(tmpt1)/*newVal*/, guest_EIP_curr_instr );
6856 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
6857 putIReg(sz, gregOfRM(rm), mkexpr(tmpd));
6858 DIP("xadd%c %s, %s\n",
6859 nameISize(sz), nameIReg(sz,gregOfRM(rm)), dis_buf);
6860 *decodeOK = True;
6861 return len+delta0;
6862 }
6863 /*UNREACHED*/
6864 vassert(0);
6865 }
6866
6867 /* Move 16 bits from Ew (ireg or mem) to G (a segment register). */
6868
6869 static
dis_mov_Ew_Sw(UChar sorb,Int delta0)6870 UInt dis_mov_Ew_Sw ( UChar sorb, Int delta0 )
6871 {
6872 Int len;
6873 IRTemp addr;
6874 UChar rm = getIByte(delta0);
6875 HChar dis_buf[50];
6876
6877 if (epartIsReg(rm)) {
6878 putSReg( gregOfRM(rm), getIReg(2, eregOfRM(rm)) );
6879 DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm)), nameSReg(gregOfRM(rm)));
6880 return 1+delta0;
6881 } else {
6882 addr = disAMode ( &len, sorb, delta0, dis_buf );
6883 putSReg( gregOfRM(rm), loadLE(Ity_I16, mkexpr(addr)) );
6884 DIP("movw %s,%s\n", dis_buf, nameSReg(gregOfRM(rm)));
6885 return len+delta0;
6886 }
6887 }
6888
6889 /* Move 16 bits from G (a segment register) to Ew (ireg or mem). If
6890 dst is ireg and sz==4, zero out top half of it. */
6891
6892 static
dis_mov_Sw_Ew(UChar sorb,Int sz,Int delta0)6893 UInt dis_mov_Sw_Ew ( UChar sorb,
6894 Int sz,
6895 Int delta0 )
6896 {
6897 Int len;
6898 IRTemp addr;
6899 UChar rm = getIByte(delta0);
6900 HChar dis_buf[50];
6901
6902 vassert(sz == 2 || sz == 4);
6903
6904 if (epartIsReg(rm)) {
6905 if (sz == 4)
6906 putIReg(4, eregOfRM(rm), unop(Iop_16Uto32, getSReg(gregOfRM(rm))));
6907 else
6908 putIReg(2, eregOfRM(rm), getSReg(gregOfRM(rm)));
6909
6910 DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), nameIReg(sz,eregOfRM(rm)));
6911 return 1+delta0;
6912 } else {
6913 addr = disAMode ( &len, sorb, delta0, dis_buf );
6914 storeLE( mkexpr(addr), getSReg(gregOfRM(rm)) );
6915 DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), dis_buf);
6916 return len+delta0;
6917 }
6918 }
6919
6920
6921 static
dis_push_segreg(UInt sreg,Int sz)6922 void dis_push_segreg ( UInt sreg, Int sz )
6923 {
6924 IRTemp t1 = newTemp(Ity_I16);
6925 IRTemp ta = newTemp(Ity_I32);
6926 vassert(sz == 2 || sz == 4);
6927
6928 assign( t1, getSReg(sreg) );
6929 assign( ta, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)) );
6930 putIReg(4, R_ESP, mkexpr(ta));
6931 storeLE( mkexpr(ta), mkexpr(t1) );
6932
6933 DIP("push%c %s\n", sz==2 ? 'w' : 'l', nameSReg(sreg));
6934 }
6935
6936 static
dis_pop_segreg(UInt sreg,Int sz)6937 void dis_pop_segreg ( UInt sreg, Int sz )
6938 {
6939 IRTemp t1 = newTemp(Ity_I16);
6940 IRTemp ta = newTemp(Ity_I32);
6941 vassert(sz == 2 || sz == 4);
6942
6943 assign( ta, getIReg(4, R_ESP) );
6944 assign( t1, loadLE(Ity_I16, mkexpr(ta)) );
6945
6946 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(ta), mkU32(sz)) );
6947 putSReg( sreg, mkexpr(t1) );
6948 DIP("pop%c %s\n", sz==2 ? 'w' : 'l', nameSReg(sreg));
6949 }
6950
6951 static
dis_ret(DisResult * dres,UInt d32)6952 void dis_ret ( /*MOD*/DisResult* dres, UInt d32 )
6953 {
6954 IRTemp t1 = newTemp(Ity_I32);
6955 IRTemp t2 = newTemp(Ity_I32);
6956 assign(t1, getIReg(4,R_ESP));
6957 assign(t2, loadLE(Ity_I32,mkexpr(t1)));
6958 putIReg(4, R_ESP,binop(Iop_Add32, mkexpr(t1), mkU32(4+d32)));
6959 jmp_treg(dres, Ijk_Ret, t2);
6960 vassert(dres->whatNext == Dis_StopHere);
6961 }
6962
6963 /*------------------------------------------------------------*/
6964 /*--- SSE/SSE2/SSE3 helpers ---*/
6965 /*------------------------------------------------------------*/
6966
6967 /* Indicates whether the op requires a rounding-mode argument. Note
6968 that this covers only vector floating point arithmetic ops, and
6969 omits the scalar ones that need rounding modes. Note also that
6970 inconsistencies here will get picked up later by the IR sanity
6971 checker, so this isn't correctness-critical. */
requiresRMode(IROp op)6972 static Bool requiresRMode ( IROp op )
6973 {
6974 switch (op) {
6975 /* 128 bit ops */
6976 case Iop_Add32Fx4: case Iop_Sub32Fx4:
6977 case Iop_Mul32Fx4: case Iop_Div32Fx4:
6978 case Iop_Add64Fx2: case Iop_Sub64Fx2:
6979 case Iop_Mul64Fx2: case Iop_Div64Fx2:
6980 return True;
6981 default:
6982 break;
6983 }
6984 return False;
6985 }
6986
6987
6988 /* Worker function; do not call directly.
6989 Handles full width G = G `op` E and G = (not G) `op` E.
6990 */
6991
dis_SSE_E_to_G_all_wrk(UChar sorb,Int delta,const HChar * opname,IROp op,Bool invertG)6992 static UInt dis_SSE_E_to_G_all_wrk (
6993 UChar sorb, Int delta,
6994 const HChar* opname, IROp op,
6995 Bool invertG
6996 )
6997 {
6998 HChar dis_buf[50];
6999 Int alen;
7000 IRTemp addr;
7001 UChar rm = getIByte(delta);
7002 IRExpr* gpart
7003 = invertG ? unop(Iop_NotV128, getXMMReg(gregOfRM(rm)))
7004 : getXMMReg(gregOfRM(rm));
7005 if (epartIsReg(rm)) {
7006 putXMMReg(
7007 gregOfRM(rm),
7008 requiresRMode(op)
7009 ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
7010 gpart,
7011 getXMMReg(eregOfRM(rm)))
7012 : binop(op, gpart,
7013 getXMMReg(eregOfRM(rm)))
7014 );
7015 DIP("%s %s,%s\n", opname,
7016 nameXMMReg(eregOfRM(rm)),
7017 nameXMMReg(gregOfRM(rm)) );
7018 return delta+1;
7019 } else {
7020 addr = disAMode ( &alen, sorb, delta, dis_buf );
7021 putXMMReg(
7022 gregOfRM(rm),
7023 requiresRMode(op)
7024 ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
7025 gpart,
7026 loadLE(Ity_V128, mkexpr(addr)))
7027 : binop(op, gpart,
7028 loadLE(Ity_V128, mkexpr(addr)))
7029 );
7030 DIP("%s %s,%s\n", opname,
7031 dis_buf,
7032 nameXMMReg(gregOfRM(rm)) );
7033 return delta+alen;
7034 }
7035 }
7036
7037
7038 /* All lanes SSE binary operation, G = G `op` E. */
7039
7040 static
dis_SSE_E_to_G_all(UChar sorb,Int delta,const HChar * opname,IROp op)7041 UInt dis_SSE_E_to_G_all ( UChar sorb, Int delta, const HChar* opname, IROp op )
7042 {
7043 return dis_SSE_E_to_G_all_wrk( sorb, delta, opname, op, False );
7044 }
7045
7046 /* All lanes SSE binary operation, G = (not G) `op` E. */
7047
7048 static
dis_SSE_E_to_G_all_invG(UChar sorb,Int delta,const HChar * opname,IROp op)7049 UInt dis_SSE_E_to_G_all_invG ( UChar sorb, Int delta,
7050 const HChar* opname, IROp op )
7051 {
7052 return dis_SSE_E_to_G_all_wrk( sorb, delta, opname, op, True );
7053 }
7054
7055
7056 /* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */
7057
dis_SSE_E_to_G_lo32(UChar sorb,Int delta,const HChar * opname,IROp op)7058 static UInt dis_SSE_E_to_G_lo32 ( UChar sorb, Int delta,
7059 const HChar* opname, IROp op )
7060 {
7061 HChar dis_buf[50];
7062 Int alen;
7063 IRTemp addr;
7064 UChar rm = getIByte(delta);
7065 IRExpr* gpart = getXMMReg(gregOfRM(rm));
7066 if (epartIsReg(rm)) {
7067 putXMMReg( gregOfRM(rm),
7068 binop(op, gpart,
7069 getXMMReg(eregOfRM(rm))) );
7070 DIP("%s %s,%s\n", opname,
7071 nameXMMReg(eregOfRM(rm)),
7072 nameXMMReg(gregOfRM(rm)) );
7073 return delta+1;
7074 } else {
7075 /* We can only do a 32-bit memory read, so the upper 3/4 of the
7076 E operand needs to be made simply of zeroes. */
7077 IRTemp epart = newTemp(Ity_V128);
7078 addr = disAMode ( &alen, sorb, delta, dis_buf );
7079 assign( epart, unop( Iop_32UtoV128,
7080 loadLE(Ity_I32, mkexpr(addr))) );
7081 putXMMReg( gregOfRM(rm),
7082 binop(op, gpart, mkexpr(epart)) );
7083 DIP("%s %s,%s\n", opname,
7084 dis_buf,
7085 nameXMMReg(gregOfRM(rm)) );
7086 return delta+alen;
7087 }
7088 }
7089
7090
7091 /* Lower 64-bit lane only SSE binary operation, G = G `op` E. */
7092
dis_SSE_E_to_G_lo64(UChar sorb,Int delta,const HChar * opname,IROp op)7093 static UInt dis_SSE_E_to_G_lo64 ( UChar sorb, Int delta,
7094 const HChar* opname, IROp op )
7095 {
7096 HChar dis_buf[50];
7097 Int alen;
7098 IRTemp addr;
7099 UChar rm = getIByte(delta);
7100 IRExpr* gpart = getXMMReg(gregOfRM(rm));
7101 if (epartIsReg(rm)) {
7102 putXMMReg( gregOfRM(rm),
7103 binop(op, gpart,
7104 getXMMReg(eregOfRM(rm))) );
7105 DIP("%s %s,%s\n", opname,
7106 nameXMMReg(eregOfRM(rm)),
7107 nameXMMReg(gregOfRM(rm)) );
7108 return delta+1;
7109 } else {
7110 /* We can only do a 64-bit memory read, so the upper half of the
7111 E operand needs to be made simply of zeroes. */
7112 IRTemp epart = newTemp(Ity_V128);
7113 addr = disAMode ( &alen, sorb, delta, dis_buf );
7114 assign( epart, unop( Iop_64UtoV128,
7115 loadLE(Ity_I64, mkexpr(addr))) );
7116 putXMMReg( gregOfRM(rm),
7117 binop(op, gpart, mkexpr(epart)) );
7118 DIP("%s %s,%s\n", opname,
7119 dis_buf,
7120 nameXMMReg(gregOfRM(rm)) );
7121 return delta+alen;
7122 }
7123 }
7124
7125
7126 /* All lanes unary SSE operation, G = op(E). */
7127
dis_SSE_E_to_G_unary_all(UChar sorb,Int delta,const HChar * opname,IROp op)7128 static UInt dis_SSE_E_to_G_unary_all (
7129 UChar sorb, Int delta,
7130 const HChar* opname, IROp op
7131 )
7132 {
7133 HChar dis_buf[50];
7134 Int alen;
7135 IRTemp addr;
7136 UChar rm = getIByte(delta);
7137 if (epartIsReg(rm)) {
7138 putXMMReg( gregOfRM(rm),
7139 unop(op, getXMMReg(eregOfRM(rm))) );
7140 DIP("%s %s,%s\n", opname,
7141 nameXMMReg(eregOfRM(rm)),
7142 nameXMMReg(gregOfRM(rm)) );
7143 return delta+1;
7144 } else {
7145 addr = disAMode ( &alen, sorb, delta, dis_buf );
7146 putXMMReg( gregOfRM(rm),
7147 unop(op, loadLE(Ity_V128, mkexpr(addr))) );
7148 DIP("%s %s,%s\n", opname,
7149 dis_buf,
7150 nameXMMReg(gregOfRM(rm)) );
7151 return delta+alen;
7152 }
7153 }
7154
7155
7156 /* Lowest 32-bit lane only unary SSE operation, G = op(E). */
7157
dis_SSE_E_to_G_unary_lo32(UChar sorb,Int delta,const HChar * opname,IROp op)7158 static UInt dis_SSE_E_to_G_unary_lo32 (
7159 UChar sorb, Int delta,
7160 const HChar* opname, IROp op
7161 )
7162 {
7163 /* First we need to get the old G value and patch the low 32 bits
7164 of the E operand into it. Then apply op and write back to G. */
7165 HChar dis_buf[50];
7166 Int alen;
7167 IRTemp addr;
7168 UChar rm = getIByte(delta);
7169 IRTemp oldG0 = newTemp(Ity_V128);
7170 IRTemp oldG1 = newTemp(Ity_V128);
7171
7172 assign( oldG0, getXMMReg(gregOfRM(rm)) );
7173
7174 if (epartIsReg(rm)) {
7175 assign( oldG1,
7176 binop( Iop_SetV128lo32,
7177 mkexpr(oldG0),
7178 getXMMRegLane32(eregOfRM(rm), 0)) );
7179 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) );
7180 DIP("%s %s,%s\n", opname,
7181 nameXMMReg(eregOfRM(rm)),
7182 nameXMMReg(gregOfRM(rm)) );
7183 return delta+1;
7184 } else {
7185 addr = disAMode ( &alen, sorb, delta, dis_buf );
7186 assign( oldG1,
7187 binop( Iop_SetV128lo32,
7188 mkexpr(oldG0),
7189 loadLE(Ity_I32, mkexpr(addr)) ));
7190 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) );
7191 DIP("%s %s,%s\n", opname,
7192 dis_buf,
7193 nameXMMReg(gregOfRM(rm)) );
7194 return delta+alen;
7195 }
7196 }
7197
7198
7199 /* Lowest 64-bit lane only unary SSE operation, G = op(E). */
7200
dis_SSE_E_to_G_unary_lo64(UChar sorb,Int delta,const HChar * opname,IROp op)7201 static UInt dis_SSE_E_to_G_unary_lo64 (
7202 UChar sorb, Int delta,
7203 const HChar* opname, IROp op
7204 )
7205 {
7206 /* First we need to get the old G value and patch the low 64 bits
7207 of the E operand into it. Then apply op and write back to G. */
7208 HChar dis_buf[50];
7209 Int alen;
7210 IRTemp addr;
7211 UChar rm = getIByte(delta);
7212 IRTemp oldG0 = newTemp(Ity_V128);
7213 IRTemp oldG1 = newTemp(Ity_V128);
7214
7215 assign( oldG0, getXMMReg(gregOfRM(rm)) );
7216
7217 if (epartIsReg(rm)) {
7218 assign( oldG1,
7219 binop( Iop_SetV128lo64,
7220 mkexpr(oldG0),
7221 getXMMRegLane64(eregOfRM(rm), 0)) );
7222 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) );
7223 DIP("%s %s,%s\n", opname,
7224 nameXMMReg(eregOfRM(rm)),
7225 nameXMMReg(gregOfRM(rm)) );
7226 return delta+1;
7227 } else {
7228 addr = disAMode ( &alen, sorb, delta, dis_buf );
7229 assign( oldG1,
7230 binop( Iop_SetV128lo64,
7231 mkexpr(oldG0),
7232 loadLE(Ity_I64, mkexpr(addr)) ));
7233 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) );
7234 DIP("%s %s,%s\n", opname,
7235 dis_buf,
7236 nameXMMReg(gregOfRM(rm)) );
7237 return delta+alen;
7238 }
7239 }
7240
7241
7242 /* SSE integer binary operation:
7243 G = G `op` E (eLeft == False)
7244 G = E `op` G (eLeft == True)
7245 */
dis_SSEint_E_to_G(UChar sorb,Int delta,const HChar * opname,IROp op,Bool eLeft)7246 static UInt dis_SSEint_E_to_G(
7247 UChar sorb, Int delta,
7248 const HChar* opname, IROp op,
7249 Bool eLeft
7250 )
7251 {
7252 HChar dis_buf[50];
7253 Int alen;
7254 IRTemp addr;
7255 UChar rm = getIByte(delta);
7256 IRExpr* gpart = getXMMReg(gregOfRM(rm));
7257 IRExpr* epart = NULL;
7258 if (epartIsReg(rm)) {
7259 epart = getXMMReg(eregOfRM(rm));
7260 DIP("%s %s,%s\n", opname,
7261 nameXMMReg(eregOfRM(rm)),
7262 nameXMMReg(gregOfRM(rm)) );
7263 delta += 1;
7264 } else {
7265 addr = disAMode ( &alen, sorb, delta, dis_buf );
7266 epart = loadLE(Ity_V128, mkexpr(addr));
7267 DIP("%s %s,%s\n", opname,
7268 dis_buf,
7269 nameXMMReg(gregOfRM(rm)) );
7270 delta += alen;
7271 }
7272 putXMMReg( gregOfRM(rm),
7273 eLeft ? binop(op, epart, gpart)
7274 : binop(op, gpart, epart) );
7275 return delta;
7276 }
7277
7278
7279 /* Helper for doing SSE FP comparisons. */
7280
findSSECmpOp(Bool * needNot,IROp * op,Int imm8,Bool all_lanes,Int sz)7281 static void findSSECmpOp ( Bool* needNot, IROp* op,
7282 Int imm8, Bool all_lanes, Int sz )
7283 {
7284 imm8 &= 7;
7285 *needNot = False;
7286 *op = Iop_INVALID;
7287 if (imm8 >= 4) {
7288 *needNot = True;
7289 imm8 -= 4;
7290 }
7291
7292 if (sz == 4 && all_lanes) {
7293 switch (imm8) {
7294 case 0: *op = Iop_CmpEQ32Fx4; return;
7295 case 1: *op = Iop_CmpLT32Fx4; return;
7296 case 2: *op = Iop_CmpLE32Fx4; return;
7297 case 3: *op = Iop_CmpUN32Fx4; return;
7298 default: break;
7299 }
7300 }
7301 if (sz == 4 && !all_lanes) {
7302 switch (imm8) {
7303 case 0: *op = Iop_CmpEQ32F0x4; return;
7304 case 1: *op = Iop_CmpLT32F0x4; return;
7305 case 2: *op = Iop_CmpLE32F0x4; return;
7306 case 3: *op = Iop_CmpUN32F0x4; return;
7307 default: break;
7308 }
7309 }
7310 if (sz == 8 && all_lanes) {
7311 switch (imm8) {
7312 case 0: *op = Iop_CmpEQ64Fx2; return;
7313 case 1: *op = Iop_CmpLT64Fx2; return;
7314 case 2: *op = Iop_CmpLE64Fx2; return;
7315 case 3: *op = Iop_CmpUN64Fx2; return;
7316 default: break;
7317 }
7318 }
7319 if (sz == 8 && !all_lanes) {
7320 switch (imm8) {
7321 case 0: *op = Iop_CmpEQ64F0x2; return;
7322 case 1: *op = Iop_CmpLT64F0x2; return;
7323 case 2: *op = Iop_CmpLE64F0x2; return;
7324 case 3: *op = Iop_CmpUN64F0x2; return;
7325 default: break;
7326 }
7327 }
7328 vpanic("findSSECmpOp(x86,guest)");
7329 }
7330
7331 /* Handles SSE 32F/64F comparisons. */
7332
dis_SSEcmp_E_to_G(UChar sorb,Int delta,const HChar * opname,Bool all_lanes,Int sz)7333 static UInt dis_SSEcmp_E_to_G ( UChar sorb, Int delta,
7334 const HChar* opname, Bool all_lanes, Int sz )
7335 {
7336 HChar dis_buf[50];
7337 Int alen, imm8;
7338 IRTemp addr;
7339 Bool needNot = False;
7340 IROp op = Iop_INVALID;
7341 IRTemp plain = newTemp(Ity_V128);
7342 UChar rm = getIByte(delta);
7343 UShort mask = 0;
7344 vassert(sz == 4 || sz == 8);
7345 if (epartIsReg(rm)) {
7346 imm8 = getIByte(delta+1);
7347 findSSECmpOp(&needNot, &op, imm8, all_lanes, sz);
7348 assign( plain, binop(op, getXMMReg(gregOfRM(rm)),
7349 getXMMReg(eregOfRM(rm))) );
7350 delta += 2;
7351 DIP("%s $%d,%s,%s\n", opname,
7352 (Int)imm8,
7353 nameXMMReg(eregOfRM(rm)),
7354 nameXMMReg(gregOfRM(rm)) );
7355 } else {
7356 addr = disAMode ( &alen, sorb, delta, dis_buf );
7357 imm8 = getIByte(delta+alen);
7358 findSSECmpOp(&needNot, &op, imm8, all_lanes, sz);
7359 assign( plain,
7360 binop(
7361 op,
7362 getXMMReg(gregOfRM(rm)),
7363 all_lanes ? loadLE(Ity_V128, mkexpr(addr))
7364 : sz == 8 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr)))
7365 : /*sz==4*/ unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr)))
7366 )
7367 );
7368 delta += alen+1;
7369 DIP("%s $%d,%s,%s\n", opname,
7370 (Int)imm8,
7371 dis_buf,
7372 nameXMMReg(gregOfRM(rm)) );
7373 }
7374
7375 if (needNot && all_lanes) {
7376 putXMMReg( gregOfRM(rm),
7377 unop(Iop_NotV128, mkexpr(plain)) );
7378 }
7379 else
7380 if (needNot && !all_lanes) {
7381 mask = toUShort( sz==4 ? 0x000F : 0x00FF );
7382 putXMMReg( gregOfRM(rm),
7383 binop(Iop_XorV128, mkexpr(plain), mkV128(mask)) );
7384 }
7385 else {
7386 putXMMReg( gregOfRM(rm), mkexpr(plain) );
7387 }
7388
7389 return delta;
7390 }
7391
7392
7393 /* Vector by scalar shift of G by the amount specified at the bottom
7394 of E. */
7395
dis_SSE_shiftG_byE(UChar sorb,Int delta,const HChar * opname,IROp op)7396 static UInt dis_SSE_shiftG_byE ( UChar sorb, Int delta,
7397 const HChar* opname, IROp op )
7398 {
7399 HChar dis_buf[50];
7400 Int alen, size;
7401 IRTemp addr;
7402 Bool shl, shr, sar;
7403 UChar rm = getIByte(delta);
7404 IRTemp g0 = newTemp(Ity_V128);
7405 IRTemp g1 = newTemp(Ity_V128);
7406 IRTemp amt = newTemp(Ity_I32);
7407 IRTemp amt8 = newTemp(Ity_I8);
7408 if (epartIsReg(rm)) {
7409 assign( amt, getXMMRegLane32(eregOfRM(rm), 0) );
7410 DIP("%s %s,%s\n", opname,
7411 nameXMMReg(eregOfRM(rm)),
7412 nameXMMReg(gregOfRM(rm)) );
7413 delta++;
7414 } else {
7415 addr = disAMode ( &alen, sorb, delta, dis_buf );
7416 assign( amt, loadLE(Ity_I32, mkexpr(addr)) );
7417 DIP("%s %s,%s\n", opname,
7418 dis_buf,
7419 nameXMMReg(gregOfRM(rm)) );
7420 delta += alen;
7421 }
7422 assign( g0, getXMMReg(gregOfRM(rm)) );
7423 assign( amt8, unop(Iop_32to8, mkexpr(amt)) );
7424
7425 shl = shr = sar = False;
7426 size = 0;
7427 switch (op) {
7428 case Iop_ShlN16x8: shl = True; size = 32; break;
7429 case Iop_ShlN32x4: shl = True; size = 32; break;
7430 case Iop_ShlN64x2: shl = True; size = 64; break;
7431 case Iop_SarN16x8: sar = True; size = 16; break;
7432 case Iop_SarN32x4: sar = True; size = 32; break;
7433 case Iop_ShrN16x8: shr = True; size = 16; break;
7434 case Iop_ShrN32x4: shr = True; size = 32; break;
7435 case Iop_ShrN64x2: shr = True; size = 64; break;
7436 default: vassert(0);
7437 }
7438
7439 if (shl || shr) {
7440 assign(
7441 g1,
7442 IRExpr_ITE(
7443 binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size)),
7444 binop(op, mkexpr(g0), mkexpr(amt8)),
7445 mkV128(0x0000)
7446 )
7447 );
7448 } else
7449 if (sar) {
7450 assign(
7451 g1,
7452 IRExpr_ITE(
7453 binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size)),
7454 binop(op, mkexpr(g0), mkexpr(amt8)),
7455 binop(op, mkexpr(g0), mkU8(size-1))
7456 )
7457 );
7458 } else {
7459 /*NOTREACHED*/
7460 vassert(0);
7461 }
7462
7463 putXMMReg( gregOfRM(rm), mkexpr(g1) );
7464 return delta;
7465 }
7466
7467
7468 /* Vector by scalar shift of E by an immediate byte. */
7469
7470 static
dis_SSE_shiftE_imm(Int delta,const HChar * opname,IROp op)7471 UInt dis_SSE_shiftE_imm ( Int delta, const HChar* opname, IROp op )
7472 {
7473 Bool shl, shr, sar;
7474 UChar rm = getIByte(delta);
7475 IRTemp e0 = newTemp(Ity_V128);
7476 IRTemp e1 = newTemp(Ity_V128);
7477 UChar amt, size;
7478 vassert(epartIsReg(rm));
7479 vassert(gregOfRM(rm) == 2
7480 || gregOfRM(rm) == 4 || gregOfRM(rm) == 6);
7481 amt = getIByte(delta+1);
7482 delta += 2;
7483 DIP("%s $%d,%s\n", opname,
7484 (Int)amt,
7485 nameXMMReg(eregOfRM(rm)) );
7486 assign( e0, getXMMReg(eregOfRM(rm)) );
7487
7488 shl = shr = sar = False;
7489 size = 0;
7490 switch (op) {
7491 case Iop_ShlN16x8: shl = True; size = 16; break;
7492 case Iop_ShlN32x4: shl = True; size = 32; break;
7493 case Iop_ShlN64x2: shl = True; size = 64; break;
7494 case Iop_SarN16x8: sar = True; size = 16; break;
7495 case Iop_SarN32x4: sar = True; size = 32; break;
7496 case Iop_ShrN16x8: shr = True; size = 16; break;
7497 case Iop_ShrN32x4: shr = True; size = 32; break;
7498 case Iop_ShrN64x2: shr = True; size = 64; break;
7499 default: vassert(0);
7500 }
7501
7502 if (shl || shr) {
7503 assign( e1, amt >= size
7504 ? mkV128(0x0000)
7505 : binop(op, mkexpr(e0), mkU8(amt))
7506 );
7507 } else
7508 if (sar) {
7509 assign( e1, amt >= size
7510 ? binop(op, mkexpr(e0), mkU8(size-1))
7511 : binop(op, mkexpr(e0), mkU8(amt))
7512 );
7513 } else {
7514 /*NOTREACHED*/
7515 vassert(0);
7516 }
7517
7518 putXMMReg( eregOfRM(rm), mkexpr(e1) );
7519 return delta;
7520 }
7521
7522
7523 /* Get the current SSE rounding mode. */
7524
get_sse_roundingmode(void)7525 static IRExpr* /* :: Ity_I32 */ get_sse_roundingmode ( void )
7526 {
7527 return binop( Iop_And32,
7528 IRExpr_Get( OFFB_SSEROUND, Ity_I32 ),
7529 mkU32(3) );
7530 }
7531
put_sse_roundingmode(IRExpr * sseround)7532 static void put_sse_roundingmode ( IRExpr* sseround )
7533 {
7534 vassert(typeOfIRExpr(irsb->tyenv, sseround) == Ity_I32);
7535 stmt( IRStmt_Put( OFFB_SSEROUND, sseround ) );
7536 }
7537
7538 /* Break a 128-bit value up into four 32-bit ints. */
7539
breakup128to32s(IRTemp t128,IRTemp * t3,IRTemp * t2,IRTemp * t1,IRTemp * t0)7540 static void breakup128to32s ( IRTemp t128,
7541 /*OUTs*/
7542 IRTemp* t3, IRTemp* t2,
7543 IRTemp* t1, IRTemp* t0 )
7544 {
7545 IRTemp hi64 = newTemp(Ity_I64);
7546 IRTemp lo64 = newTemp(Ity_I64);
7547 assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) );
7548 assign( lo64, unop(Iop_V128to64, mkexpr(t128)) );
7549
7550 vassert(t0 && *t0 == IRTemp_INVALID);
7551 vassert(t1 && *t1 == IRTemp_INVALID);
7552 vassert(t2 && *t2 == IRTemp_INVALID);
7553 vassert(t3 && *t3 == IRTemp_INVALID);
7554
7555 *t0 = newTemp(Ity_I32);
7556 *t1 = newTemp(Ity_I32);
7557 *t2 = newTemp(Ity_I32);
7558 *t3 = newTemp(Ity_I32);
7559 assign( *t0, unop(Iop_64to32, mkexpr(lo64)) );
7560 assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) );
7561 assign( *t2, unop(Iop_64to32, mkexpr(hi64)) );
7562 assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) );
7563 }
7564
7565 /* Construct a 128-bit value from four 32-bit ints. */
7566
mk128from32s(IRTemp t3,IRTemp t2,IRTemp t1,IRTemp t0)7567 static IRExpr* mk128from32s ( IRTemp t3, IRTemp t2,
7568 IRTemp t1, IRTemp t0 )
7569 {
7570 return
7571 binop( Iop_64HLtoV128,
7572 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)),
7573 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0))
7574 );
7575 }
7576
7577 /* Break a 64-bit value up into four 16-bit ints. */
7578
breakup64to16s(IRTemp t64,IRTemp * t3,IRTemp * t2,IRTemp * t1,IRTemp * t0)7579 static void breakup64to16s ( IRTemp t64,
7580 /*OUTs*/
7581 IRTemp* t3, IRTemp* t2,
7582 IRTemp* t1, IRTemp* t0 )
7583 {
7584 IRTemp hi32 = newTemp(Ity_I32);
7585 IRTemp lo32 = newTemp(Ity_I32);
7586 assign( hi32, unop(Iop_64HIto32, mkexpr(t64)) );
7587 assign( lo32, unop(Iop_64to32, mkexpr(t64)) );
7588
7589 vassert(t0 && *t0 == IRTemp_INVALID);
7590 vassert(t1 && *t1 == IRTemp_INVALID);
7591 vassert(t2 && *t2 == IRTemp_INVALID);
7592 vassert(t3 && *t3 == IRTemp_INVALID);
7593
7594 *t0 = newTemp(Ity_I16);
7595 *t1 = newTemp(Ity_I16);
7596 *t2 = newTemp(Ity_I16);
7597 *t3 = newTemp(Ity_I16);
7598 assign( *t0, unop(Iop_32to16, mkexpr(lo32)) );
7599 assign( *t1, unop(Iop_32HIto16, mkexpr(lo32)) );
7600 assign( *t2, unop(Iop_32to16, mkexpr(hi32)) );
7601 assign( *t3, unop(Iop_32HIto16, mkexpr(hi32)) );
7602 }
7603
7604 /* Construct a 64-bit value from four 16-bit ints. */
7605
mk64from16s(IRTemp t3,IRTemp t2,IRTemp t1,IRTemp t0)7606 static IRExpr* mk64from16s ( IRTemp t3, IRTemp t2,
7607 IRTemp t1, IRTemp t0 )
7608 {
7609 return
7610 binop( Iop_32HLto64,
7611 binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)),
7612 binop(Iop_16HLto32, mkexpr(t1), mkexpr(t0))
7613 );
7614 }
7615
7616 /* Generate IR to set the guest %EFLAGS from the pushfl-format image
7617 in the given 32-bit temporary. The flags that are set are: O S Z A
7618 C P D ID AC.
7619
7620 In all cases, code to set AC is generated. However, VEX actually
7621 ignores the AC value and so can optionally emit an emulation
7622 warning when it is enabled. In this routine, an emulation warning
7623 is only emitted if emit_AC_emwarn is True, in which case
7624 next_insn_EIP must be correct (this allows for correct code
7625 generation for popfl/popfw). If emit_AC_emwarn is False,
7626 next_insn_EIP is unimportant (this allows for easy if kludgey code
7627 generation for IRET.) */
7628
7629 static
set_EFLAGS_from_value(IRTemp t1,Bool emit_AC_emwarn,Addr32 next_insn_EIP)7630 void set_EFLAGS_from_value ( IRTemp t1,
7631 Bool emit_AC_emwarn,
7632 Addr32 next_insn_EIP )
7633 {
7634 vassert(typeOfIRTemp(irsb->tyenv,t1) == Ity_I32);
7635
7636 /* t1 is the flag word. Mask out everything except OSZACP and set
7637 the flags thunk to X86G_CC_OP_COPY. */
7638 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
7639 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
7640 stmt( IRStmt_Put( OFFB_CC_DEP1,
7641 binop(Iop_And32,
7642 mkexpr(t1),
7643 mkU32( X86G_CC_MASK_C | X86G_CC_MASK_P
7644 | X86G_CC_MASK_A | X86G_CC_MASK_Z
7645 | X86G_CC_MASK_S| X86G_CC_MASK_O )
7646 )
7647 )
7648 );
7649 /* Set NDEP even though it isn't used. This makes redundant-PUT
7650 elimination of previous stores to this field work better. */
7651 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
7652
7653 /* Also need to set the D flag, which is held in bit 10 of t1.
7654 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */
7655 stmt( IRStmt_Put(
7656 OFFB_DFLAG,
7657 IRExpr_ITE(
7658 unop(Iop_32to1,
7659 binop(Iop_And32,
7660 binop(Iop_Shr32, mkexpr(t1), mkU8(10)),
7661 mkU32(1))),
7662 mkU32(0xFFFFFFFF),
7663 mkU32(1)))
7664 );
7665
7666 /* Set the ID flag */
7667 stmt( IRStmt_Put(
7668 OFFB_IDFLAG,
7669 IRExpr_ITE(
7670 unop(Iop_32to1,
7671 binop(Iop_And32,
7672 binop(Iop_Shr32, mkexpr(t1), mkU8(21)),
7673 mkU32(1))),
7674 mkU32(1),
7675 mkU32(0)))
7676 );
7677
7678 /* And set the AC flag. If setting it 1 to, possibly emit an
7679 emulation warning. */
7680 stmt( IRStmt_Put(
7681 OFFB_ACFLAG,
7682 IRExpr_ITE(
7683 unop(Iop_32to1,
7684 binop(Iop_And32,
7685 binop(Iop_Shr32, mkexpr(t1), mkU8(18)),
7686 mkU32(1))),
7687 mkU32(1),
7688 mkU32(0)))
7689 );
7690
7691 if (emit_AC_emwarn) {
7692 put_emwarn( mkU32(EmWarn_X86_acFlag) );
7693 stmt(
7694 IRStmt_Exit(
7695 binop( Iop_CmpNE32,
7696 binop(Iop_And32, mkexpr(t1), mkU32(1<<18)),
7697 mkU32(0) ),
7698 Ijk_EmWarn,
7699 IRConst_U32( next_insn_EIP ),
7700 OFFB_EIP
7701 )
7702 );
7703 }
7704 }
7705
7706
7707 /* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit
7708 values (aa,bb), computes, for each of the 4 16-bit lanes:
7709
7710 (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1
7711 */
dis_PMULHRSW_helper(IRExpr * aax,IRExpr * bbx)7712 static IRExpr* dis_PMULHRSW_helper ( IRExpr* aax, IRExpr* bbx )
7713 {
7714 IRTemp aa = newTemp(Ity_I64);
7715 IRTemp bb = newTemp(Ity_I64);
7716 IRTemp aahi32s = newTemp(Ity_I64);
7717 IRTemp aalo32s = newTemp(Ity_I64);
7718 IRTemp bbhi32s = newTemp(Ity_I64);
7719 IRTemp bblo32s = newTemp(Ity_I64);
7720 IRTemp rHi = newTemp(Ity_I64);
7721 IRTemp rLo = newTemp(Ity_I64);
7722 IRTemp one32x2 = newTemp(Ity_I64);
7723 assign(aa, aax);
7724 assign(bb, bbx);
7725 assign( aahi32s,
7726 binop(Iop_SarN32x2,
7727 binop(Iop_InterleaveHI16x4, mkexpr(aa), mkexpr(aa)),
7728 mkU8(16) ));
7729 assign( aalo32s,
7730 binop(Iop_SarN32x2,
7731 binop(Iop_InterleaveLO16x4, mkexpr(aa), mkexpr(aa)),
7732 mkU8(16) ));
7733 assign( bbhi32s,
7734 binop(Iop_SarN32x2,
7735 binop(Iop_InterleaveHI16x4, mkexpr(bb), mkexpr(bb)),
7736 mkU8(16) ));
7737 assign( bblo32s,
7738 binop(Iop_SarN32x2,
7739 binop(Iop_InterleaveLO16x4, mkexpr(bb), mkexpr(bb)),
7740 mkU8(16) ));
7741 assign(one32x2, mkU64( (1ULL << 32) + 1 ));
7742 assign(
7743 rHi,
7744 binop(
7745 Iop_ShrN32x2,
7746 binop(
7747 Iop_Add32x2,
7748 binop(
7749 Iop_ShrN32x2,
7750 binop(Iop_Mul32x2, mkexpr(aahi32s), mkexpr(bbhi32s)),
7751 mkU8(14)
7752 ),
7753 mkexpr(one32x2)
7754 ),
7755 mkU8(1)
7756 )
7757 );
7758 assign(
7759 rLo,
7760 binop(
7761 Iop_ShrN32x2,
7762 binop(
7763 Iop_Add32x2,
7764 binop(
7765 Iop_ShrN32x2,
7766 binop(Iop_Mul32x2, mkexpr(aalo32s), mkexpr(bblo32s)),
7767 mkU8(14)
7768 ),
7769 mkexpr(one32x2)
7770 ),
7771 mkU8(1)
7772 )
7773 );
7774 return
7775 binop(Iop_CatEvenLanes16x4, mkexpr(rHi), mkexpr(rLo));
7776 }
7777
7778 /* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit
7779 values (aa,bb), computes, for each lane:
7780
7781 if aa_lane < 0 then - bb_lane
7782 else if aa_lane > 0 then bb_lane
7783 else 0
7784 */
dis_PSIGN_helper(IRExpr * aax,IRExpr * bbx,Int laneszB)7785 static IRExpr* dis_PSIGN_helper ( IRExpr* aax, IRExpr* bbx, Int laneszB )
7786 {
7787 IRTemp aa = newTemp(Ity_I64);
7788 IRTemp bb = newTemp(Ity_I64);
7789 IRTemp zero = newTemp(Ity_I64);
7790 IRTemp bbNeg = newTemp(Ity_I64);
7791 IRTemp negMask = newTemp(Ity_I64);
7792 IRTemp posMask = newTemp(Ity_I64);
7793 IROp opSub = Iop_INVALID;
7794 IROp opCmpGTS = Iop_INVALID;
7795
7796 switch (laneszB) {
7797 case 1: opSub = Iop_Sub8x8; opCmpGTS = Iop_CmpGT8Sx8; break;
7798 case 2: opSub = Iop_Sub16x4; opCmpGTS = Iop_CmpGT16Sx4; break;
7799 case 4: opSub = Iop_Sub32x2; opCmpGTS = Iop_CmpGT32Sx2; break;
7800 default: vassert(0);
7801 }
7802
7803 assign( aa, aax );
7804 assign( bb, bbx );
7805 assign( zero, mkU64(0) );
7806 assign( bbNeg, binop(opSub, mkexpr(zero), mkexpr(bb)) );
7807 assign( negMask, binop(opCmpGTS, mkexpr(zero), mkexpr(aa)) );
7808 assign( posMask, binop(opCmpGTS, mkexpr(aa), mkexpr(zero)) );
7809
7810 return
7811 binop(Iop_Or64,
7812 binop(Iop_And64, mkexpr(bb), mkexpr(posMask)),
7813 binop(Iop_And64, mkexpr(bbNeg), mkexpr(negMask)) );
7814
7815 }
7816
7817 /* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit
7818 value aa, computes, for each lane
7819
7820 if aa < 0 then -aa else aa
7821
7822 Note that the result is interpreted as unsigned, so that the
7823 absolute value of the most negative signed input can be
7824 represented.
7825 */
dis_PABS_helper(IRExpr * aax,Int laneszB)7826 static IRExpr* dis_PABS_helper ( IRExpr* aax, Int laneszB )
7827 {
7828 IRTemp aa = newTemp(Ity_I64);
7829 IRTemp zero = newTemp(Ity_I64);
7830 IRTemp aaNeg = newTemp(Ity_I64);
7831 IRTemp negMask = newTemp(Ity_I64);
7832 IRTemp posMask = newTemp(Ity_I64);
7833 IROp opSub = Iop_INVALID;
7834 IROp opSarN = Iop_INVALID;
7835
7836 switch (laneszB) {
7837 case 1: opSub = Iop_Sub8x8; opSarN = Iop_SarN8x8; break;
7838 case 2: opSub = Iop_Sub16x4; opSarN = Iop_SarN16x4; break;
7839 case 4: opSub = Iop_Sub32x2; opSarN = Iop_SarN32x2; break;
7840 default: vassert(0);
7841 }
7842
7843 assign( aa, aax );
7844 assign( negMask, binop(opSarN, mkexpr(aa), mkU8(8*laneszB-1)) );
7845 assign( posMask, unop(Iop_Not64, mkexpr(negMask)) );
7846 assign( zero, mkU64(0) );
7847 assign( aaNeg, binop(opSub, mkexpr(zero), mkexpr(aa)) );
7848 return
7849 binop(Iop_Or64,
7850 binop(Iop_And64, mkexpr(aa), mkexpr(posMask)),
7851 binop(Iop_And64, mkexpr(aaNeg), mkexpr(negMask)) );
7852 }
7853
dis_PALIGNR_XMM_helper(IRTemp hi64,IRTemp lo64,Int byteShift)7854 static IRExpr* dis_PALIGNR_XMM_helper ( IRTemp hi64,
7855 IRTemp lo64, Int byteShift )
7856 {
7857 vassert(byteShift >= 1 && byteShift <= 7);
7858 return
7859 binop(Iop_Or64,
7860 binop(Iop_Shl64, mkexpr(hi64), mkU8(8*(8-byteShift))),
7861 binop(Iop_Shr64, mkexpr(lo64), mkU8(8*byteShift))
7862 );
7863 }
7864
7865 /* Generate a SIGSEGV followed by a restart of the current instruction
7866 if effective_addr is not 16-aligned. This is required behaviour
7867 for some SSE3 instructions and all 128-bit SSSE3 instructions.
7868 This assumes that guest_RIP_curr_instr is set correctly! */
gen_SEGV_if_not_16_aligned(IRTemp effective_addr)7869 static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr )
7870 {
7871 stmt(
7872 IRStmt_Exit(
7873 binop(Iop_CmpNE32,
7874 binop(Iop_And32,mkexpr(effective_addr),mkU32(0xF)),
7875 mkU32(0)),
7876 Ijk_SigSEGV,
7877 IRConst_U32(guest_EIP_curr_instr),
7878 OFFB_EIP
7879 )
7880 );
7881 }
7882
7883
7884 /* Helper for deciding whether a given insn (starting at the opcode
7885 byte) may validly be used with a LOCK prefix. The following insns
7886 may be used with LOCK when their destination operand is in memory.
7887 AFAICS this is exactly the same for both 32-bit and 64-bit mode.
7888
7889 ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01
7890 OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09
7891 ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11
7892 SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19
7893 AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21
7894 SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29
7895 XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31
7896
7897 DEC FE /1, FF /1
7898 INC FE /0, FF /0
7899
7900 NEG F6 /3, F7 /3
7901 NOT F6 /2, F7 /2
7902
7903 XCHG 86, 87
7904
7905 BTC 0F BB, 0F BA /7
7906 BTR 0F B3, 0F BA /6
7907 BTS 0F AB, 0F BA /5
7908
7909 CMPXCHG 0F B0, 0F B1
7910 CMPXCHG8B 0F C7 /1
7911
7912 XADD 0F C0, 0F C1
7913
7914 ------------------------------
7915
7916 80 /0 = addb $imm8, rm8
7917 81 /0 = addl $imm32, rm32 and addw $imm16, rm16
7918 82 /0 = addb $imm8, rm8
7919 83 /0 = addl $simm8, rm32 and addw $simm8, rm16
7920
7921 00 = addb r8, rm8
7922 01 = addl r32, rm32 and addw r16, rm16
7923
7924 Same for ADD OR ADC SBB AND SUB XOR
7925
7926 FE /1 = dec rm8
7927 FF /1 = dec rm32 and dec rm16
7928
7929 FE /0 = inc rm8
7930 FF /0 = inc rm32 and inc rm16
7931
7932 F6 /3 = neg rm8
7933 F7 /3 = neg rm32 and neg rm16
7934
7935 F6 /2 = not rm8
7936 F7 /2 = not rm32 and not rm16
7937
7938 0F BB = btcw r16, rm16 and btcl r32, rm32
7939 OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32
7940
7941 Same for BTS, BTR
7942 */
can_be_used_with_LOCK_prefix(UChar * opc)7943 static Bool can_be_used_with_LOCK_prefix ( UChar* opc )
7944 {
7945 switch (opc[0]) {
7946 case 0x00: case 0x01: case 0x08: case 0x09:
7947 case 0x10: case 0x11: case 0x18: case 0x19:
7948 case 0x20: case 0x21: case 0x28: case 0x29:
7949 case 0x30: case 0x31:
7950 if (!epartIsReg(opc[1]))
7951 return True;
7952 break;
7953
7954 case 0x80: case 0x81: case 0x82: case 0x83:
7955 if (gregOfRM(opc[1]) >= 0 && gregOfRM(opc[1]) <= 6
7956 && !epartIsReg(opc[1]))
7957 return True;
7958 break;
7959
7960 case 0xFE: case 0xFF:
7961 if (gregOfRM(opc[1]) >= 0 && gregOfRM(opc[1]) <= 1
7962 && !epartIsReg(opc[1]))
7963 return True;
7964 break;
7965
7966 case 0xF6: case 0xF7:
7967 if (gregOfRM(opc[1]) >= 2 && gregOfRM(opc[1]) <= 3
7968 && !epartIsReg(opc[1]))
7969 return True;
7970 break;
7971
7972 case 0x86: case 0x87:
7973 if (!epartIsReg(opc[1]))
7974 return True;
7975 break;
7976
7977 case 0x0F: {
7978 switch (opc[1]) {
7979 case 0xBB: case 0xB3: case 0xAB:
7980 if (!epartIsReg(opc[2]))
7981 return True;
7982 break;
7983 case 0xBA:
7984 if (gregOfRM(opc[2]) >= 5 && gregOfRM(opc[2]) <= 7
7985 && !epartIsReg(opc[2]))
7986 return True;
7987 break;
7988 case 0xB0: case 0xB1:
7989 if (!epartIsReg(opc[2]))
7990 return True;
7991 break;
7992 case 0xC7:
7993 if (gregOfRM(opc[2]) == 1 && !epartIsReg(opc[2]) )
7994 return True;
7995 break;
7996 case 0xC0: case 0xC1:
7997 if (!epartIsReg(opc[2]))
7998 return True;
7999 break;
8000 default:
8001 break;
8002 } /* switch (opc[1]) */
8003 break;
8004 }
8005
8006 default:
8007 break;
8008 } /* switch (opc[0]) */
8009
8010 return False;
8011 }
8012
math_BSWAP(IRTemp t1,IRType ty)8013 static IRTemp math_BSWAP ( IRTemp t1, IRType ty )
8014 {
8015 IRTemp t2 = newTemp(ty);
8016 if (ty == Ity_I32) {
8017 assign( t2,
8018 binop(
8019 Iop_Or32,
8020 binop(Iop_Shl32, mkexpr(t1), mkU8(24)),
8021 binop(
8022 Iop_Or32,
8023 binop(Iop_And32, binop(Iop_Shl32, mkexpr(t1), mkU8(8)),
8024 mkU32(0x00FF0000)),
8025 binop(Iop_Or32,
8026 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(8)),
8027 mkU32(0x0000FF00)),
8028 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(24)),
8029 mkU32(0x000000FF) )
8030 )))
8031 );
8032 return t2;
8033 }
8034 if (ty == Ity_I16) {
8035 assign(t2,
8036 binop(Iop_Or16,
8037 binop(Iop_Shl16, mkexpr(t1), mkU8(8)),
8038 binop(Iop_Shr16, mkexpr(t1), mkU8(8)) ));
8039 return t2;
8040 }
8041 vassert(0);
8042 /*NOTREACHED*/
8043 return IRTemp_INVALID;
8044 }
8045
8046 /*------------------------------------------------------------*/
8047 /*--- Disassemble a single instruction ---*/
8048 /*------------------------------------------------------------*/
8049
8050 /* Disassemble a single instruction into IR. The instruction is
8051 located in host memory at &guest_code[delta]. *expect_CAS is set
8052 to True if the resulting IR is expected to contain an IRCAS
8053 statement, and False if it's not expected to. This makes it
8054 possible for the caller of disInstr_X86_WRK to check that
8055 LOCK-prefixed instructions are at least plausibly translated, in
8056 that it becomes possible to check that a (validly) LOCK-prefixed
8057 instruction generates a translation containing an IRCAS, and
8058 instructions without LOCK prefixes don't generate translations
8059 containing an IRCAS.
8060 */
8061 static
disInstr_X86_WRK(Bool * expect_CAS,Bool (* resteerOkFn)(void *,Addr64),Bool resteerCisOk,void * callback_opaque,Long delta64,VexArchInfo * archinfo,VexAbiInfo * vbi,Bool sigill_diag)8062 DisResult disInstr_X86_WRK (
8063 /*OUT*/Bool* expect_CAS,
8064 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
8065 Bool resteerCisOk,
8066 void* callback_opaque,
8067 Long delta64,
8068 VexArchInfo* archinfo,
8069 VexAbiInfo* vbi,
8070 Bool sigill_diag
8071 )
8072 {
8073 IRType ty;
8074 IRTemp addr, t0, t1, t2, t3, t4, t5, t6;
8075 Int alen;
8076 UChar opc, modrm, abyte, pre;
8077 UInt d32;
8078 HChar dis_buf[50];
8079 Int am_sz, d_sz, n_prefixes;
8080 DisResult dres;
8081 UChar* insn; /* used in SSE decoders */
8082
8083 /* The running delta */
8084 Int delta = (Int)delta64;
8085
8086 /* Holds eip at the start of the insn, so that we can print
8087 consistent error messages for unimplemented insns. */
8088 Int delta_start = delta;
8089
8090 /* sz denotes the nominal data-op size of the insn; we change it to
8091 2 if an 0x66 prefix is seen */
8092 Int sz = 4;
8093
8094 /* sorb holds the segment-override-prefix byte, if any. Zero if no
8095 prefix has been seen, else one of {0x26, 0x3E, 0x64, 0x65}
8096 indicating the prefix. */
8097 UChar sorb = 0;
8098
8099 /* Gets set to True if a LOCK prefix is seen. */
8100 Bool pfx_lock = False;
8101
8102 /* Set result defaults. */
8103 dres.whatNext = Dis_Continue;
8104 dres.len = 0;
8105 dres.continueAt = 0;
8106 dres.jk_StopHere = Ijk_INVALID;
8107
8108 *expect_CAS = False;
8109
8110 addr = t0 = t1 = t2 = t3 = t4 = t5 = t6 = IRTemp_INVALID;
8111
8112 vassert(guest_EIP_bbstart + delta == guest_EIP_curr_instr);
8113 DIP("\t0x%x: ", guest_EIP_bbstart+delta);
8114
8115 /* Spot "Special" instructions (see comment at top of file). */
8116 {
8117 UChar* code = (UChar*)(guest_code + delta);
8118 /* Spot the 12-byte preamble:
8119 C1C703 roll $3, %edi
8120 C1C70D roll $13, %edi
8121 C1C71D roll $29, %edi
8122 C1C713 roll $19, %edi
8123 */
8124 if (code[ 0] == 0xC1 && code[ 1] == 0xC7 && code[ 2] == 0x03 &&
8125 code[ 3] == 0xC1 && code[ 4] == 0xC7 && code[ 5] == 0x0D &&
8126 code[ 6] == 0xC1 && code[ 7] == 0xC7 && code[ 8] == 0x1D &&
8127 code[ 9] == 0xC1 && code[10] == 0xC7 && code[11] == 0x13) {
8128 /* Got a "Special" instruction preamble. Which one is it? */
8129 if (code[12] == 0x87 && code[13] == 0xDB /* xchgl %ebx,%ebx */) {
8130 /* %EDX = client_request ( %EAX ) */
8131 DIP("%%edx = client_request ( %%eax )\n");
8132 delta += 14;
8133 jmp_lit(&dres, Ijk_ClientReq, guest_EIP_bbstart+delta);
8134 vassert(dres.whatNext == Dis_StopHere);
8135 goto decode_success;
8136 }
8137 else
8138 if (code[12] == 0x87 && code[13] == 0xC9 /* xchgl %ecx,%ecx */) {
8139 /* %EAX = guest_NRADDR */
8140 DIP("%%eax = guest_NRADDR\n");
8141 delta += 14;
8142 putIReg(4, R_EAX, IRExpr_Get( OFFB_NRADDR, Ity_I32 ));
8143 goto decode_success;
8144 }
8145 else
8146 if (code[12] == 0x87 && code[13] == 0xD2 /* xchgl %edx,%edx */) {
8147 /* call-noredir *%EAX */
8148 DIP("call-noredir *%%eax\n");
8149 delta += 14;
8150 t1 = newTemp(Ity_I32);
8151 assign(t1, getIReg(4,R_EAX));
8152 t2 = newTemp(Ity_I32);
8153 assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4)));
8154 putIReg(4, R_ESP, mkexpr(t2));
8155 storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta));
8156 jmp_treg(&dres, Ijk_NoRedir, t1);
8157 vassert(dres.whatNext == Dis_StopHere);
8158 goto decode_success;
8159 }
8160 else
8161 if (code[12] == 0x87 && code[13] == 0xFF /* xchgl %edi,%edi */) {
8162 /* IR injection */
8163 DIP("IR injection\n");
8164 vex_inject_ir(irsb, Iend_LE);
8165
8166 // Invalidate the current insn. The reason is that the IRop we're
8167 // injecting here can change. In which case the translation has to
8168 // be redone. For ease of handling, we simply invalidate all the
8169 // time.
8170 stmt(IRStmt_Put(OFFB_CMSTART, mkU32(guest_EIP_curr_instr)));
8171 stmt(IRStmt_Put(OFFB_CMLEN, mkU32(14)));
8172
8173 delta += 14;
8174
8175 stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_bbstart + delta) ) );
8176 dres.whatNext = Dis_StopHere;
8177 dres.jk_StopHere = Ijk_InvalICache;
8178 goto decode_success;
8179 }
8180 /* We don't know what it is. */
8181 goto decode_failure;
8182 /*NOTREACHED*/
8183 }
8184 }
8185
8186 /* Handle a couple of weird-ass NOPs that have been observed in the
8187 wild. */
8188 {
8189 UChar* code = (UChar*)(guest_code + delta);
8190 /* Sun's JVM 1.5.0 uses the following as a NOP:
8191 26 2E 64 65 90 %es:%cs:%fs:%gs:nop */
8192 if (code[0] == 0x26 && code[1] == 0x2E && code[2] == 0x64
8193 && code[3] == 0x65 && code[4] == 0x90) {
8194 DIP("%%es:%%cs:%%fs:%%gs:nop\n");
8195 delta += 5;
8196 goto decode_success;
8197 }
8198 /* Don't barf on recent binutils padding,
8199 all variants of which are: nopw %cs:0x0(%eax,%eax,1)
8200 66 2e 0f 1f 84 00 00 00 00 00
8201 66 66 2e 0f 1f 84 00 00 00 00 00
8202 66 66 66 2e 0f 1f 84 00 00 00 00 00
8203 66 66 66 66 2e 0f 1f 84 00 00 00 00 00
8204 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00
8205 66 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00
8206 */
8207 if (code[0] == 0x66) {
8208 Int data16_cnt;
8209 for (data16_cnt = 1; data16_cnt < 6; data16_cnt++)
8210 if (code[data16_cnt] != 0x66)
8211 break;
8212 if (code[data16_cnt] == 0x2E && code[data16_cnt + 1] == 0x0F
8213 && code[data16_cnt + 2] == 0x1F && code[data16_cnt + 3] == 0x84
8214 && code[data16_cnt + 4] == 0x00 && code[data16_cnt + 5] == 0x00
8215 && code[data16_cnt + 6] == 0x00 && code[data16_cnt + 7] == 0x00
8216 && code[data16_cnt + 8] == 0x00 ) {
8217 DIP("nopw %%cs:0x0(%%eax,%%eax,1)\n");
8218 delta += 9 + data16_cnt;
8219 goto decode_success;
8220 }
8221 }
8222 }
8223
8224 /* Normal instruction handling starts here. */
8225
8226 /* Deal with some but not all prefixes:
8227 66(oso)
8228 F0(lock)
8229 2E(cs:) 3E(ds:) 26(es:) 64(fs:) 65(gs:) 36(ss:)
8230 Not dealt with (left in place):
8231 F2 F3
8232 */
8233 n_prefixes = 0;
8234 while (True) {
8235 if (n_prefixes > 7) goto decode_failure;
8236 pre = getUChar(delta);
8237 switch (pre) {
8238 case 0x66:
8239 sz = 2;
8240 break;
8241 case 0xF0:
8242 pfx_lock = True;
8243 *expect_CAS = True;
8244 break;
8245 case 0x3E: /* %DS: */
8246 case 0x26: /* %ES: */
8247 case 0x64: /* %FS: */
8248 case 0x65: /* %GS: */
8249 if (sorb != 0)
8250 goto decode_failure; /* only one seg override allowed */
8251 sorb = pre;
8252 break;
8253 case 0x2E: { /* %CS: */
8254 /* 2E prefix on a conditional branch instruction is a
8255 branch-prediction hint, which can safely be ignored. */
8256 UChar op1 = getIByte(delta+1);
8257 UChar op2 = getIByte(delta+2);
8258 if ((op1 >= 0x70 && op1 <= 0x7F)
8259 || (op1 == 0xE3)
8260 || (op1 == 0x0F && op2 >= 0x80 && op2 <= 0x8F)) {
8261 if (0) vex_printf("vex x86->IR: ignoring branch hint\n");
8262 } else {
8263 /* All other CS override cases are not handled */
8264 goto decode_failure;
8265 }
8266 break;
8267 }
8268 case 0x36: /* %SS: */
8269 /* SS override cases are not handled */
8270 goto decode_failure;
8271 default:
8272 goto not_a_prefix;
8273 }
8274 n_prefixes++;
8275 delta++;
8276 }
8277
8278 not_a_prefix:
8279
8280 /* Now we should be looking at the primary opcode byte or the
8281 leading F2 or F3. Check that any LOCK prefix is actually
8282 allowed. */
8283
8284 if (pfx_lock) {
8285 if (can_be_used_with_LOCK_prefix( (UChar*)&guest_code[delta] )) {
8286 DIP("lock ");
8287 } else {
8288 *expect_CAS = False;
8289 goto decode_failure;
8290 }
8291 }
8292
8293
8294 /* ---------------------------------------------------- */
8295 /* --- The SSE decoder. --- */
8296 /* ---------------------------------------------------- */
8297
8298 /* What did I do to deserve SSE ? Perhaps I was really bad in a
8299 previous life? */
8300
8301 /* Note, this doesn't handle SSE2 or SSE3. That is handled in a
8302 later section, further on. */
8303
8304 insn = (UChar*)&guest_code[delta];
8305
8306 /* Treat fxsave specially. It should be doable even on an SSE0
8307 (Pentium-II class) CPU. Hence be prepared to handle it on
8308 any subarchitecture variant.
8309 */
8310
8311 /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory */
8312 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xAE
8313 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 0) {
8314 IRDirty* d;
8315 modrm = getIByte(delta+2);
8316 vassert(sz == 4);
8317 vassert(!epartIsReg(modrm));
8318
8319 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8320 delta += 2+alen;
8321 gen_SEGV_if_not_16_aligned(addr);
8322
8323 DIP("fxsave %s\n", dis_buf);
8324
8325 /* Uses dirty helper:
8326 void x86g_do_FXSAVE ( VexGuestX86State*, UInt ) */
8327 d = unsafeIRDirty_0_N (
8328 0/*regparms*/,
8329 "x86g_dirtyhelper_FXSAVE",
8330 &x86g_dirtyhelper_FXSAVE,
8331 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) )
8332 );
8333
8334 /* declare we're writing memory */
8335 d->mFx = Ifx_Write;
8336 d->mAddr = mkexpr(addr);
8337 d->mSize = 464; /* according to recent Intel docs */
8338
8339 /* declare we're reading guest state */
8340 d->nFxState = 7;
8341 vex_bzero(&d->fxState, sizeof(d->fxState));
8342
8343 d->fxState[0].fx = Ifx_Read;
8344 d->fxState[0].offset = OFFB_FTOP;
8345 d->fxState[0].size = sizeof(UInt);
8346
8347 d->fxState[1].fx = Ifx_Read;
8348 d->fxState[1].offset = OFFB_FPREGS;
8349 d->fxState[1].size = 8 * sizeof(ULong);
8350
8351 d->fxState[2].fx = Ifx_Read;
8352 d->fxState[2].offset = OFFB_FPTAGS;
8353 d->fxState[2].size = 8 * sizeof(UChar);
8354
8355 d->fxState[3].fx = Ifx_Read;
8356 d->fxState[3].offset = OFFB_FPROUND;
8357 d->fxState[3].size = sizeof(UInt);
8358
8359 d->fxState[4].fx = Ifx_Read;
8360 d->fxState[4].offset = OFFB_FC3210;
8361 d->fxState[4].size = sizeof(UInt);
8362
8363 d->fxState[5].fx = Ifx_Read;
8364 d->fxState[5].offset = OFFB_XMM0;
8365 d->fxState[5].size = 8 * sizeof(U128);
8366
8367 d->fxState[6].fx = Ifx_Read;
8368 d->fxState[6].offset = OFFB_SSEROUND;
8369 d->fxState[6].size = sizeof(UInt);
8370
8371 /* Be paranoid ... this assertion tries to ensure the 8 %xmm
8372 images are packed back-to-back. If not, the value of
8373 d->fxState[5].size is wrong. */
8374 vassert(16 == sizeof(U128));
8375 vassert(OFFB_XMM7 == (OFFB_XMM0 + 7 * 16));
8376
8377 stmt( IRStmt_Dirty(d) );
8378
8379 goto decode_success;
8380 }
8381
8382 /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory */
8383 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xAE
8384 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 1) {
8385 IRDirty* d;
8386 modrm = getIByte(delta+2);
8387 vassert(sz == 4);
8388 vassert(!epartIsReg(modrm));
8389
8390 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8391 delta += 2+alen;
8392 gen_SEGV_if_not_16_aligned(addr);
8393
8394 DIP("fxrstor %s\n", dis_buf);
8395
8396 /* Uses dirty helper:
8397 VexEmNote x86g_do_FXRSTOR ( VexGuestX86State*, UInt )
8398 NOTE:
8399 the VexEmNote value is simply ignored (unlike for FRSTOR)
8400 */
8401 d = unsafeIRDirty_0_N (
8402 0/*regparms*/,
8403 "x86g_dirtyhelper_FXRSTOR",
8404 &x86g_dirtyhelper_FXRSTOR,
8405 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) )
8406 );
8407
8408 /* declare we're reading memory */
8409 d->mFx = Ifx_Read;
8410 d->mAddr = mkexpr(addr);
8411 d->mSize = 464; /* according to recent Intel docs */
8412
8413 /* declare we're writing guest state */
8414 d->nFxState = 7;
8415 vex_bzero(&d->fxState, sizeof(d->fxState));
8416
8417 d->fxState[0].fx = Ifx_Write;
8418 d->fxState[0].offset = OFFB_FTOP;
8419 d->fxState[0].size = sizeof(UInt);
8420
8421 d->fxState[1].fx = Ifx_Write;
8422 d->fxState[1].offset = OFFB_FPREGS;
8423 d->fxState[1].size = 8 * sizeof(ULong);
8424
8425 d->fxState[2].fx = Ifx_Write;
8426 d->fxState[2].offset = OFFB_FPTAGS;
8427 d->fxState[2].size = 8 * sizeof(UChar);
8428
8429 d->fxState[3].fx = Ifx_Write;
8430 d->fxState[3].offset = OFFB_FPROUND;
8431 d->fxState[3].size = sizeof(UInt);
8432
8433 d->fxState[4].fx = Ifx_Write;
8434 d->fxState[4].offset = OFFB_FC3210;
8435 d->fxState[4].size = sizeof(UInt);
8436
8437 d->fxState[5].fx = Ifx_Write;
8438 d->fxState[5].offset = OFFB_XMM0;
8439 d->fxState[5].size = 8 * sizeof(U128);
8440
8441 d->fxState[6].fx = Ifx_Write;
8442 d->fxState[6].offset = OFFB_SSEROUND;
8443 d->fxState[6].size = sizeof(UInt);
8444
8445 /* Be paranoid ... this assertion tries to ensure the 8 %xmm
8446 images are packed back-to-back. If not, the value of
8447 d->fxState[5].size is wrong. */
8448 vassert(16 == sizeof(U128));
8449 vassert(OFFB_XMM7 == (OFFB_XMM0 + 7 * 16));
8450
8451 stmt( IRStmt_Dirty(d) );
8452
8453 goto decode_success;
8454 }
8455
8456 /* ------ SSE decoder main ------ */
8457
8458 /* Skip parts of the decoder which don't apply given the stated
8459 guest subarchitecture. */
8460 if (archinfo->hwcaps == 0/*baseline, no sse at all*/)
8461 goto after_sse_decoders;
8462
8463 /* With mmxext only some extended MMX instructions are recognized.
8464 The mmxext instructions are MASKMOVQ MOVNTQ PAVGB PAVGW PMAXSW
8465 PMAXUB PMINSW PMINUB PMULHUW PSADBW PSHUFW PEXTRW PINSRW PMOVMSKB
8466 PREFETCHNTA PREFETCHT0 PREFETCHT1 PREFETCHT2 SFENCE
8467
8468 http://support.amd.com/us/Embedded_TechDocs/22466.pdf
8469 https://en.wikipedia.org/wiki/3DNow!#3DNow.21_extensions */
8470
8471 if (archinfo->hwcaps == VEX_HWCAPS_X86_MMXEXT/*integer only sse1 subset*/)
8472 goto mmxext;
8473
8474 /* Otherwise we must be doing sse1 or sse2, so we can at least try
8475 for SSE1 here. */
8476
8477 /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */
8478 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x58) {
8479 delta = dis_SSE_E_to_G_all( sorb, delta+2, "addps", Iop_Add32Fx4 );
8480 goto decode_success;
8481 }
8482
8483 /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */
8484 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x58) {
8485 vassert(sz == 4);
8486 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "addss", Iop_Add32F0x4 );
8487 goto decode_success;
8488 }
8489
8490 /* 0F 55 = ANDNPS -- G = (not G) and E */
8491 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x55) {
8492 delta = dis_SSE_E_to_G_all_invG( sorb, delta+2, "andnps", Iop_AndV128 );
8493 goto decode_success;
8494 }
8495
8496 /* 0F 54 = ANDPS -- G = G and E */
8497 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x54) {
8498 delta = dis_SSE_E_to_G_all( sorb, delta+2, "andps", Iop_AndV128 );
8499 goto decode_success;
8500 }
8501
8502 /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */
8503 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC2) {
8504 delta = dis_SSEcmp_E_to_G( sorb, delta+2, "cmpps", True, 4 );
8505 goto decode_success;
8506 }
8507
8508 /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */
8509 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xC2) {
8510 vassert(sz == 4);
8511 delta = dis_SSEcmp_E_to_G( sorb, delta+3, "cmpss", False, 4 );
8512 goto decode_success;
8513 }
8514
8515 /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */
8516 /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */
8517 if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x2F || insn[1] == 0x2E)) {
8518 IRTemp argL = newTemp(Ity_F32);
8519 IRTemp argR = newTemp(Ity_F32);
8520 modrm = getIByte(delta+2);
8521 if (epartIsReg(modrm)) {
8522 assign( argR, getXMMRegLane32F( eregOfRM(modrm), 0/*lowest lane*/ ) );
8523 delta += 2+1;
8524 DIP("[u]comiss %s,%s\n", nameXMMReg(eregOfRM(modrm)),
8525 nameXMMReg(gregOfRM(modrm)) );
8526 } else {
8527 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8528 assign( argR, loadLE(Ity_F32, mkexpr(addr)) );
8529 delta += 2+alen;
8530 DIP("[u]comiss %s,%s\n", dis_buf,
8531 nameXMMReg(gregOfRM(modrm)) );
8532 }
8533 assign( argL, getXMMRegLane32F( gregOfRM(modrm), 0/*lowest lane*/ ) );
8534
8535 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
8536 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
8537 stmt( IRStmt_Put(
8538 OFFB_CC_DEP1,
8539 binop( Iop_And32,
8540 binop(Iop_CmpF64,
8541 unop(Iop_F32toF64,mkexpr(argL)),
8542 unop(Iop_F32toF64,mkexpr(argR))),
8543 mkU32(0x45)
8544 )));
8545 /* Set NDEP even though it isn't used. This makes redundant-PUT
8546 elimination of previous stores to this field work better. */
8547 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
8548 goto decode_success;
8549 }
8550
8551 /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low
8552 half xmm */
8553 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x2A) {
8554 IRTemp arg64 = newTemp(Ity_I64);
8555 IRTemp rmode = newTemp(Ity_I32);
8556 vassert(sz == 4);
8557
8558 modrm = getIByte(delta+2);
8559 do_MMX_preamble();
8560 if (epartIsReg(modrm)) {
8561 assign( arg64, getMMXReg(eregOfRM(modrm)) );
8562 delta += 2+1;
8563 DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregOfRM(modrm)),
8564 nameXMMReg(gregOfRM(modrm)));
8565 } else {
8566 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8567 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
8568 delta += 2+alen;
8569 DIP("cvtpi2ps %s,%s\n", dis_buf,
8570 nameXMMReg(gregOfRM(modrm)) );
8571 }
8572
8573 assign( rmode, get_sse_roundingmode() );
8574
8575 putXMMRegLane32F(
8576 gregOfRM(modrm), 0,
8577 binop(Iop_F64toF32,
8578 mkexpr(rmode),
8579 unop(Iop_I32StoF64,
8580 unop(Iop_64to32, mkexpr(arg64)) )) );
8581
8582 putXMMRegLane32F(
8583 gregOfRM(modrm), 1,
8584 binop(Iop_F64toF32,
8585 mkexpr(rmode),
8586 unop(Iop_I32StoF64,
8587 unop(Iop_64HIto32, mkexpr(arg64)) )) );
8588
8589 goto decode_success;
8590 }
8591
8592 /* F3 0F 2A = CVTSI2SS -- convert I32 in mem/ireg to F32 in low
8593 quarter xmm */
8594 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x2A) {
8595 IRTemp arg32 = newTemp(Ity_I32);
8596 IRTemp rmode = newTemp(Ity_I32);
8597 vassert(sz == 4);
8598
8599 modrm = getIByte(delta+3);
8600 if (epartIsReg(modrm)) {
8601 assign( arg32, getIReg(4, eregOfRM(modrm)) );
8602 delta += 3+1;
8603 DIP("cvtsi2ss %s,%s\n", nameIReg(4, eregOfRM(modrm)),
8604 nameXMMReg(gregOfRM(modrm)));
8605 } else {
8606 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
8607 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
8608 delta += 3+alen;
8609 DIP("cvtsi2ss %s,%s\n", dis_buf,
8610 nameXMMReg(gregOfRM(modrm)) );
8611 }
8612
8613 assign( rmode, get_sse_roundingmode() );
8614
8615 putXMMRegLane32F(
8616 gregOfRM(modrm), 0,
8617 binop(Iop_F64toF32,
8618 mkexpr(rmode),
8619 unop(Iop_I32StoF64, mkexpr(arg32)) ) );
8620
8621 goto decode_success;
8622 }
8623
8624 /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
8625 I32 in mmx, according to prevailing SSE rounding mode */
8626 /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
8627 I32 in mmx, rounding towards zero */
8628 if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x2D || insn[1] == 0x2C)) {
8629 IRTemp dst64 = newTemp(Ity_I64);
8630 IRTemp rmode = newTemp(Ity_I32);
8631 IRTemp f32lo = newTemp(Ity_F32);
8632 IRTemp f32hi = newTemp(Ity_F32);
8633 Bool r2zero = toBool(insn[1] == 0x2C);
8634
8635 do_MMX_preamble();
8636 modrm = getIByte(delta+2);
8637
8638 if (epartIsReg(modrm)) {
8639 delta += 2+1;
8640 assign(f32lo, getXMMRegLane32F(eregOfRM(modrm), 0));
8641 assign(f32hi, getXMMRegLane32F(eregOfRM(modrm), 1));
8642 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "",
8643 nameXMMReg(eregOfRM(modrm)),
8644 nameMMXReg(gregOfRM(modrm)));
8645 } else {
8646 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8647 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
8648 assign(f32hi, loadLE(Ity_F32, binop( Iop_Add32,
8649 mkexpr(addr),
8650 mkU32(4) )));
8651 delta += 2+alen;
8652 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "",
8653 dis_buf,
8654 nameMMXReg(gregOfRM(modrm)));
8655 }
8656
8657 if (r2zero) {
8658 assign(rmode, mkU32((UInt)Irrm_ZERO) );
8659 } else {
8660 assign( rmode, get_sse_roundingmode() );
8661 }
8662
8663 assign(
8664 dst64,
8665 binop( Iop_32HLto64,
8666 binop( Iop_F64toI32S,
8667 mkexpr(rmode),
8668 unop( Iop_F32toF64, mkexpr(f32hi) ) ),
8669 binop( Iop_F64toI32S,
8670 mkexpr(rmode),
8671 unop( Iop_F32toF64, mkexpr(f32lo) ) )
8672 )
8673 );
8674
8675 putMMXReg(gregOfRM(modrm), mkexpr(dst64));
8676 goto decode_success;
8677 }
8678
8679 /* F3 0F 2D = CVTSS2SI -- convert F32 in mem/low quarter xmm to
8680 I32 in ireg, according to prevailing SSE rounding mode */
8681 /* F3 0F 2C = CVTTSS2SI -- convert F32 in mem/low quarter xmm to
8682 I32 in ireg, rounding towards zero */
8683 if (insn[0] == 0xF3 && insn[1] == 0x0F
8684 && (insn[2] == 0x2D || insn[2] == 0x2C)) {
8685 IRTemp rmode = newTemp(Ity_I32);
8686 IRTemp f32lo = newTemp(Ity_F32);
8687 Bool r2zero = toBool(insn[2] == 0x2C);
8688 vassert(sz == 4);
8689
8690 modrm = getIByte(delta+3);
8691 if (epartIsReg(modrm)) {
8692 delta += 3+1;
8693 assign(f32lo, getXMMRegLane32F(eregOfRM(modrm), 0));
8694 DIP("cvt%sss2si %s,%s\n", r2zero ? "t" : "",
8695 nameXMMReg(eregOfRM(modrm)),
8696 nameIReg(4, gregOfRM(modrm)));
8697 } else {
8698 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
8699 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
8700 delta += 3+alen;
8701 DIP("cvt%sss2si %s,%s\n", r2zero ? "t" : "",
8702 dis_buf,
8703 nameIReg(4, gregOfRM(modrm)));
8704 }
8705
8706 if (r2zero) {
8707 assign( rmode, mkU32((UInt)Irrm_ZERO) );
8708 } else {
8709 assign( rmode, get_sse_roundingmode() );
8710 }
8711
8712 putIReg(4, gregOfRM(modrm),
8713 binop( Iop_F64toI32S,
8714 mkexpr(rmode),
8715 unop( Iop_F32toF64, mkexpr(f32lo) ) )
8716 );
8717
8718 goto decode_success;
8719 }
8720
8721 /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */
8722 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5E) {
8723 delta = dis_SSE_E_to_G_all( sorb, delta+2, "divps", Iop_Div32Fx4 );
8724 goto decode_success;
8725 }
8726
8727 /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */
8728 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5E) {
8729 vassert(sz == 4);
8730 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "divss", Iop_Div32F0x4 );
8731 goto decode_success;
8732 }
8733
8734 /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */
8735 if (insn[0] == 0x0F && insn[1] == 0xAE
8736 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 2) {
8737
8738 IRTemp t64 = newTemp(Ity_I64);
8739 IRTemp ew = newTemp(Ity_I32);
8740
8741 modrm = getIByte(delta+2);
8742 vassert(!epartIsReg(modrm));
8743 vassert(sz == 4);
8744
8745 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8746 delta += 2+alen;
8747 DIP("ldmxcsr %s\n", dis_buf);
8748
8749 /* The only thing we observe in %mxcsr is the rounding mode.
8750 Therefore, pass the 32-bit value (SSE native-format control
8751 word) to a clean helper, getting back a 64-bit value, the
8752 lower half of which is the SSEROUND value to store, and the
8753 upper half of which is the emulation-warning token which may
8754 be generated.
8755 */
8756 /* ULong x86h_check_ldmxcsr ( UInt ); */
8757 assign( t64, mkIRExprCCall(
8758 Ity_I64, 0/*regparms*/,
8759 "x86g_check_ldmxcsr",
8760 &x86g_check_ldmxcsr,
8761 mkIRExprVec_1( loadLE(Ity_I32, mkexpr(addr)) )
8762 )
8763 );
8764
8765 put_sse_roundingmode( unop(Iop_64to32, mkexpr(t64)) );
8766 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
8767 put_emwarn( mkexpr(ew) );
8768 /* Finally, if an emulation warning was reported, side-exit to
8769 the next insn, reporting the warning, so that Valgrind's
8770 dispatcher sees the warning. */
8771 stmt(
8772 IRStmt_Exit(
8773 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
8774 Ijk_EmWarn,
8775 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta),
8776 OFFB_EIP
8777 )
8778 );
8779 goto decode_success;
8780 }
8781
8782
8783 /* mmxext sse1 subset starts here. mmxext only arches will parse
8784 only this subset of the sse1 instructions. */
8785 mmxext:
8786
8787 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8788 /* 0F F7 = MASKMOVQ -- 8x8 masked store */
8789 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF7) {
8790 Bool ok = False;
8791 delta = dis_MMX( &ok, sorb, sz, delta+1 );
8792 if (!ok)
8793 goto decode_failure;
8794 goto decode_success;
8795 }
8796
8797 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8798 /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the
8799 Intel manual does not say anything about the usual business of
8800 the FP reg tags getting trashed whenever an MMX insn happens.
8801 So we just leave them alone.
8802 */
8803 if (insn[0] == 0x0F && insn[1] == 0xE7) {
8804 modrm = getIByte(delta+2);
8805 if (sz == 4 && !epartIsReg(modrm)) {
8806 /* do_MMX_preamble(); Intel docs don't specify this */
8807 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8808 storeLE( mkexpr(addr), getMMXReg(gregOfRM(modrm)) );
8809 DIP("movntq %s,%s\n", dis_buf,
8810 nameMMXReg(gregOfRM(modrm)));
8811 delta += 2+alen;
8812 goto decode_success;
8813 }
8814 /* else fall through */
8815 }
8816
8817 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8818 /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */
8819 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE0) {
8820 do_MMX_preamble();
8821 delta = dis_MMXop_regmem_to_reg (
8822 sorb, delta+2, insn[1], "pavgb", False );
8823 goto decode_success;
8824 }
8825
8826 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8827 /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */
8828 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE3) {
8829 do_MMX_preamble();
8830 delta = dis_MMXop_regmem_to_reg (
8831 sorb, delta+2, insn[1], "pavgw", False );
8832 goto decode_success;
8833 }
8834
8835 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8836 /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put
8837 zero-extend of it in ireg(G). */
8838 if (insn[0] == 0x0F && insn[1] == 0xC5) {
8839 modrm = insn[2];
8840 if (sz == 4 && epartIsReg(modrm)) {
8841 IRTemp sV = newTemp(Ity_I64);
8842 t5 = newTemp(Ity_I16);
8843 do_MMX_preamble();
8844 assign(sV, getMMXReg(eregOfRM(modrm)));
8845 breakup64to16s( sV, &t3, &t2, &t1, &t0 );
8846 switch (insn[3] & 3) {
8847 case 0: assign(t5, mkexpr(t0)); break;
8848 case 1: assign(t5, mkexpr(t1)); break;
8849 case 2: assign(t5, mkexpr(t2)); break;
8850 case 3: assign(t5, mkexpr(t3)); break;
8851 default: vassert(0); /*NOTREACHED*/
8852 }
8853 putIReg(4, gregOfRM(modrm), unop(Iop_16Uto32, mkexpr(t5)));
8854 DIP("pextrw $%d,%s,%s\n",
8855 (Int)insn[3], nameMMXReg(eregOfRM(modrm)),
8856 nameIReg(4,gregOfRM(modrm)));
8857 delta += 4;
8858 goto decode_success;
8859 }
8860 /* else fall through */
8861 }
8862
8863 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8864 /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
8865 put it into the specified lane of mmx(G). */
8866 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC4) {
8867 /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the
8868 mmx reg. t4 is the new lane value. t5 is the original
8869 mmx value. t6 is the new mmx value. */
8870 Int lane;
8871 t4 = newTemp(Ity_I16);
8872 t5 = newTemp(Ity_I64);
8873 t6 = newTemp(Ity_I64);
8874 modrm = insn[2];
8875 do_MMX_preamble();
8876
8877 assign(t5, getMMXReg(gregOfRM(modrm)));
8878 breakup64to16s( t5, &t3, &t2, &t1, &t0 );
8879
8880 if (epartIsReg(modrm)) {
8881 assign(t4, getIReg(2, eregOfRM(modrm)));
8882 delta += 3+1;
8883 lane = insn[3+1-1];
8884 DIP("pinsrw $%d,%s,%s\n", (Int)lane,
8885 nameIReg(2,eregOfRM(modrm)),
8886 nameMMXReg(gregOfRM(modrm)));
8887 } else {
8888 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8889 delta += 3+alen;
8890 lane = insn[3+alen-1];
8891 assign(t4, loadLE(Ity_I16, mkexpr(addr)));
8892 DIP("pinsrw $%d,%s,%s\n", (Int)lane,
8893 dis_buf,
8894 nameMMXReg(gregOfRM(modrm)));
8895 }
8896
8897 switch (lane & 3) {
8898 case 0: assign(t6, mk64from16s(t3,t2,t1,t4)); break;
8899 case 1: assign(t6, mk64from16s(t3,t2,t4,t0)); break;
8900 case 2: assign(t6, mk64from16s(t3,t4,t1,t0)); break;
8901 case 3: assign(t6, mk64from16s(t4,t2,t1,t0)); break;
8902 default: vassert(0); /*NOTREACHED*/
8903 }
8904 putMMXReg(gregOfRM(modrm), mkexpr(t6));
8905 goto decode_success;
8906 }
8907
8908 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8909 /* 0F EE = PMAXSW -- 16x4 signed max */
8910 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xEE) {
8911 do_MMX_preamble();
8912 delta = dis_MMXop_regmem_to_reg (
8913 sorb, delta+2, insn[1], "pmaxsw", False );
8914 goto decode_success;
8915 }
8916
8917 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8918 /* 0F DE = PMAXUB -- 8x8 unsigned max */
8919 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xDE) {
8920 do_MMX_preamble();
8921 delta = dis_MMXop_regmem_to_reg (
8922 sorb, delta+2, insn[1], "pmaxub", False );
8923 goto decode_success;
8924 }
8925
8926 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8927 /* 0F EA = PMINSW -- 16x4 signed min */
8928 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xEA) {
8929 do_MMX_preamble();
8930 delta = dis_MMXop_regmem_to_reg (
8931 sorb, delta+2, insn[1], "pminsw", False );
8932 goto decode_success;
8933 }
8934
8935 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8936 /* 0F DA = PMINUB -- 8x8 unsigned min */
8937 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xDA) {
8938 do_MMX_preamble();
8939 delta = dis_MMXop_regmem_to_reg (
8940 sorb, delta+2, insn[1], "pminub", False );
8941 goto decode_success;
8942 }
8943
8944 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8945 /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in
8946 mmx(E), turn them into a byte, and put zero-extend of it in
8947 ireg(G). */
8948 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xD7) {
8949 modrm = insn[2];
8950 if (epartIsReg(modrm)) {
8951 do_MMX_preamble();
8952 t0 = newTemp(Ity_I64);
8953 t1 = newTemp(Ity_I32);
8954 assign(t0, getMMXReg(eregOfRM(modrm)));
8955 assign(t1, unop(Iop_8Uto32, unop(Iop_GetMSBs8x8, mkexpr(t0))));
8956 putIReg(4, gregOfRM(modrm), mkexpr(t1));
8957 DIP("pmovmskb %s,%s\n", nameMMXReg(eregOfRM(modrm)),
8958 nameIReg(4,gregOfRM(modrm)));
8959 delta += 3;
8960 goto decode_success;
8961 }
8962 /* else fall through */
8963 }
8964
8965 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8966 /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */
8967 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE4) {
8968 do_MMX_preamble();
8969 delta = dis_MMXop_regmem_to_reg (
8970 sorb, delta+2, insn[1], "pmuluh", False );
8971 goto decode_success;
8972 }
8973
8974 /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */
8975 /* 0F 18 /1 = PREFETCH0 -- with various different hints */
8976 /* 0F 18 /2 = PREFETCH1 */
8977 /* 0F 18 /3 = PREFETCH2 */
8978 if (insn[0] == 0x0F && insn[1] == 0x18
8979 && !epartIsReg(insn[2])
8980 && gregOfRM(insn[2]) >= 0 && gregOfRM(insn[2]) <= 3) {
8981 const HChar* hintstr = "??";
8982
8983 modrm = getIByte(delta+2);
8984 vassert(!epartIsReg(modrm));
8985
8986 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8987 delta += 2+alen;
8988
8989 switch (gregOfRM(modrm)) {
8990 case 0: hintstr = "nta"; break;
8991 case 1: hintstr = "t0"; break;
8992 case 2: hintstr = "t1"; break;
8993 case 3: hintstr = "t2"; break;
8994 default: vassert(0); /*NOTREACHED*/
8995 }
8996
8997 DIP("prefetch%s %s\n", hintstr, dis_buf);
8998 goto decode_success;
8999 }
9000
9001 /* 0F 0D /0 = PREFETCH m8 -- 3DNow! prefetch */
9002 /* 0F 0D /1 = PREFETCHW m8 -- ditto, with some other hint */
9003 if (insn[0] == 0x0F && insn[1] == 0x0D
9004 && !epartIsReg(insn[2])
9005 && gregOfRM(insn[2]) >= 0 && gregOfRM(insn[2]) <= 1) {
9006 const HChar* hintstr = "??";
9007
9008 modrm = getIByte(delta+2);
9009 vassert(!epartIsReg(modrm));
9010
9011 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9012 delta += 2+alen;
9013
9014 switch (gregOfRM(modrm)) {
9015 case 0: hintstr = ""; break;
9016 case 1: hintstr = "w"; break;
9017 default: vassert(0); /*NOTREACHED*/
9018 }
9019
9020 DIP("prefetch%s %s\n", hintstr, dis_buf);
9021 goto decode_success;
9022 }
9023
9024 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9025 /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */
9026 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF6) {
9027 do_MMX_preamble();
9028 delta = dis_MMXop_regmem_to_reg (
9029 sorb, delta+2, insn[1], "psadbw", False );
9030 goto decode_success;
9031 }
9032
9033 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9034 /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */
9035 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x70) {
9036 Int order;
9037 IRTemp sV, dV, s3, s2, s1, s0;
9038 s3 = s2 = s1 = s0 = IRTemp_INVALID;
9039 sV = newTemp(Ity_I64);
9040 dV = newTemp(Ity_I64);
9041 do_MMX_preamble();
9042 modrm = insn[2];
9043 if (epartIsReg(modrm)) {
9044 assign( sV, getMMXReg(eregOfRM(modrm)) );
9045 order = (Int)insn[3];
9046 delta += 2+2;
9047 DIP("pshufw $%d,%s,%s\n", order,
9048 nameMMXReg(eregOfRM(modrm)),
9049 nameMMXReg(gregOfRM(modrm)));
9050 } else {
9051 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9052 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
9053 order = (Int)insn[2+alen];
9054 delta += 3+alen;
9055 DIP("pshufw $%d,%s,%s\n", order,
9056 dis_buf,
9057 nameMMXReg(gregOfRM(modrm)));
9058 }
9059 breakup64to16s( sV, &s3, &s2, &s1, &s0 );
9060
9061 # define SEL(n) \
9062 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
9063 assign(dV,
9064 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
9065 SEL((order>>2)&3), SEL((order>>0)&3) )
9066 );
9067 putMMXReg(gregOfRM(modrm), mkexpr(dV));
9068 # undef SEL
9069 goto decode_success;
9070 }
9071
9072 /* 0F AE /7 = SFENCE -- flush pending operations to memory */
9073 if (insn[0] == 0x0F && insn[1] == 0xAE
9074 && epartIsReg(insn[2]) && gregOfRM(insn[2]) == 7) {
9075 vassert(sz == 4);
9076 delta += 3;
9077 /* Insert a memory fence. It's sometimes important that these
9078 are carried through to the generated code. */
9079 stmt( IRStmt_MBE(Imbe_Fence) );
9080 DIP("sfence\n");
9081 goto decode_success;
9082 }
9083
9084 /* End of mmxext sse1 subset. No more sse parsing for mmxext only arches. */
9085 if (archinfo->hwcaps == VEX_HWCAPS_X86_MMXEXT/*integer only sse1 subset*/)
9086 goto after_sse_decoders;
9087
9088
9089 /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */
9090 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5F) {
9091 delta = dis_SSE_E_to_G_all( sorb, delta+2, "maxps", Iop_Max32Fx4 );
9092 goto decode_success;
9093 }
9094
9095 /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */
9096 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5F) {
9097 vassert(sz == 4);
9098 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "maxss", Iop_Max32F0x4 );
9099 goto decode_success;
9100 }
9101
9102 /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */
9103 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5D) {
9104 delta = dis_SSE_E_to_G_all( sorb, delta+2, "minps", Iop_Min32Fx4 );
9105 goto decode_success;
9106 }
9107
9108 /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */
9109 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5D) {
9110 vassert(sz == 4);
9111 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "minss", Iop_Min32F0x4 );
9112 goto decode_success;
9113 }
9114
9115 /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */
9116 /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */
9117 if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x28 || insn[1] == 0x10)) {
9118 modrm = getIByte(delta+2);
9119 if (epartIsReg(modrm)) {
9120 putXMMReg( gregOfRM(modrm),
9121 getXMMReg( eregOfRM(modrm) ));
9122 DIP("mov[ua]ps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9123 nameXMMReg(gregOfRM(modrm)));
9124 delta += 2+1;
9125 } else {
9126 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9127 if (insn[1] == 0x28/*movaps*/)
9128 gen_SEGV_if_not_16_aligned( addr );
9129 putXMMReg( gregOfRM(modrm),
9130 loadLE(Ity_V128, mkexpr(addr)) );
9131 DIP("mov[ua]ps %s,%s\n", dis_buf,
9132 nameXMMReg(gregOfRM(modrm)));
9133 delta += 2+alen;
9134 }
9135 goto decode_success;
9136 }
9137
9138 /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */
9139 /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */
9140 if (sz == 4 && insn[0] == 0x0F
9141 && (insn[1] == 0x29 || insn[1] == 0x11)) {
9142 modrm = getIByte(delta+2);
9143 if (epartIsReg(modrm)) {
9144 /* fall through; awaiting test case */
9145 } else {
9146 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9147 if (insn[1] == 0x29/*movaps*/)
9148 gen_SEGV_if_not_16_aligned( addr );
9149 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
9150 DIP("mov[ua]ps %s,%s\n", nameXMMReg(gregOfRM(modrm)),
9151 dis_buf );
9152 delta += 2+alen;
9153 goto decode_success;
9154 }
9155 }
9156
9157 /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */
9158 /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */
9159 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x16) {
9160 modrm = getIByte(delta+2);
9161 if (epartIsReg(modrm)) {
9162 delta += 2+1;
9163 putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/,
9164 getXMMRegLane64( eregOfRM(modrm), 0 ) );
9165 DIP("movhps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9166 nameXMMReg(gregOfRM(modrm)));
9167 } else {
9168 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9169 delta += 2+alen;
9170 putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/,
9171 loadLE(Ity_I64, mkexpr(addr)) );
9172 DIP("movhps %s,%s\n", dis_buf,
9173 nameXMMReg( gregOfRM(modrm) ));
9174 }
9175 goto decode_success;
9176 }
9177
9178 /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */
9179 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x17) {
9180 if (!epartIsReg(insn[2])) {
9181 delta += 2;
9182 addr = disAMode ( &alen, sorb, delta, dis_buf );
9183 delta += alen;
9184 storeLE( mkexpr(addr),
9185 getXMMRegLane64( gregOfRM(insn[2]),
9186 1/*upper lane*/ ) );
9187 DIP("movhps %s,%s\n", nameXMMReg( gregOfRM(insn[2]) ),
9188 dis_buf);
9189 goto decode_success;
9190 }
9191 /* else fall through */
9192 }
9193
9194 /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */
9195 /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */
9196 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x12) {
9197 modrm = getIByte(delta+2);
9198 if (epartIsReg(modrm)) {
9199 delta += 2+1;
9200 putXMMRegLane64( gregOfRM(modrm),
9201 0/*lower lane*/,
9202 getXMMRegLane64( eregOfRM(modrm), 1 ));
9203 DIP("movhlps %s, %s\n", nameXMMReg(eregOfRM(modrm)),
9204 nameXMMReg(gregOfRM(modrm)));
9205 } else {
9206 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9207 delta += 2+alen;
9208 putXMMRegLane64( gregOfRM(modrm), 0/*lower lane*/,
9209 loadLE(Ity_I64, mkexpr(addr)) );
9210 DIP("movlps %s, %s\n",
9211 dis_buf, nameXMMReg( gregOfRM(modrm) ));
9212 }
9213 goto decode_success;
9214 }
9215
9216 /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */
9217 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x13) {
9218 if (!epartIsReg(insn[2])) {
9219 delta += 2;
9220 addr = disAMode ( &alen, sorb, delta, dis_buf );
9221 delta += alen;
9222 storeLE( mkexpr(addr),
9223 getXMMRegLane64( gregOfRM(insn[2]),
9224 0/*lower lane*/ ) );
9225 DIP("movlps %s, %s\n", nameXMMReg( gregOfRM(insn[2]) ),
9226 dis_buf);
9227 goto decode_success;
9228 }
9229 /* else fall through */
9230 }
9231
9232 /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E)
9233 to 4 lowest bits of ireg(G) */
9234 if (insn[0] == 0x0F && insn[1] == 0x50) {
9235 modrm = getIByte(delta+2);
9236 if (sz == 4 && epartIsReg(modrm)) {
9237 Int src;
9238 t0 = newTemp(Ity_I32);
9239 t1 = newTemp(Ity_I32);
9240 t2 = newTemp(Ity_I32);
9241 t3 = newTemp(Ity_I32);
9242 delta += 2+1;
9243 src = eregOfRM(modrm);
9244 assign( t0, binop( Iop_And32,
9245 binop(Iop_Shr32, getXMMRegLane32(src,0), mkU8(31)),
9246 mkU32(1) ));
9247 assign( t1, binop( Iop_And32,
9248 binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(30)),
9249 mkU32(2) ));
9250 assign( t2, binop( Iop_And32,
9251 binop(Iop_Shr32, getXMMRegLane32(src,2), mkU8(29)),
9252 mkU32(4) ));
9253 assign( t3, binop( Iop_And32,
9254 binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(28)),
9255 mkU32(8) ));
9256 putIReg(4, gregOfRM(modrm),
9257 binop(Iop_Or32,
9258 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
9259 binop(Iop_Or32, mkexpr(t2), mkexpr(t3))
9260 )
9261 );
9262 DIP("movmskps %s,%s\n", nameXMMReg(src),
9263 nameIReg(4, gregOfRM(modrm)));
9264 goto decode_success;
9265 }
9266 /* else fall through */
9267 }
9268
9269 /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */
9270 /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */
9271 if (insn[0] == 0x0F && insn[1] == 0x2B) {
9272 modrm = getIByte(delta+2);
9273 if (!epartIsReg(modrm)) {
9274 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9275 gen_SEGV_if_not_16_aligned( addr );
9276 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
9277 DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s",
9278 dis_buf,
9279 nameXMMReg(gregOfRM(modrm)));
9280 delta += 2+alen;
9281 goto decode_success;
9282 }
9283 /* else fall through */
9284 }
9285
9286 /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G
9287 (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */
9288 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x10) {
9289 vassert(sz == 4);
9290 modrm = getIByte(delta+3);
9291 if (epartIsReg(modrm)) {
9292 putXMMRegLane32( gregOfRM(modrm), 0,
9293 getXMMRegLane32( eregOfRM(modrm), 0 ));
9294 DIP("movss %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9295 nameXMMReg(gregOfRM(modrm)));
9296 delta += 3+1;
9297 } else {
9298 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
9299 /* zero bits 127:64 */
9300 putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) );
9301 /* zero bits 63:32 */
9302 putXMMRegLane32( gregOfRM(modrm), 1, mkU32(0) );
9303 /* write bits 31:0 */
9304 putXMMRegLane32( gregOfRM(modrm), 0,
9305 loadLE(Ity_I32, mkexpr(addr)) );
9306 DIP("movss %s,%s\n", dis_buf,
9307 nameXMMReg(gregOfRM(modrm)));
9308 delta += 3+alen;
9309 }
9310 goto decode_success;
9311 }
9312
9313 /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem
9314 or lo 1/4 xmm). */
9315 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x11) {
9316 vassert(sz == 4);
9317 modrm = getIByte(delta+3);
9318 if (epartIsReg(modrm)) {
9319 /* fall through, we don't yet have a test case */
9320 } else {
9321 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
9322 storeLE( mkexpr(addr),
9323 getXMMRegLane32(gregOfRM(modrm), 0) );
9324 DIP("movss %s,%s\n", nameXMMReg(gregOfRM(modrm)),
9325 dis_buf);
9326 delta += 3+alen;
9327 goto decode_success;
9328 }
9329 }
9330
9331 /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */
9332 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x59) {
9333 delta = dis_SSE_E_to_G_all( sorb, delta+2, "mulps", Iop_Mul32Fx4 );
9334 goto decode_success;
9335 }
9336
9337 /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */
9338 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x59) {
9339 vassert(sz == 4);
9340 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "mulss", Iop_Mul32F0x4 );
9341 goto decode_success;
9342 }
9343
9344 /* 0F 56 = ORPS -- G = G and E */
9345 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x56) {
9346 delta = dis_SSE_E_to_G_all( sorb, delta+2, "orps", Iop_OrV128 );
9347 goto decode_success;
9348 }
9349
9350 /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */
9351 if (insn[0] == 0x0F && insn[1] == 0x53) {
9352 vassert(sz == 4);
9353 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2,
9354 "rcpps", Iop_Recip32Fx4 );
9355 goto decode_success;
9356 }
9357
9358 /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */
9359 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x53) {
9360 vassert(sz == 4);
9361 delta = dis_SSE_E_to_G_unary_lo32( sorb, delta+3,
9362 "rcpss", Iop_Recip32F0x4 );
9363 goto decode_success;
9364 }
9365
9366 /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */
9367 if (insn[0] == 0x0F && insn[1] == 0x52) {
9368 vassert(sz == 4);
9369 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2,
9370 "rsqrtps", Iop_RSqrt32Fx4 );
9371 goto decode_success;
9372 }
9373
9374 /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */
9375 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x52) {
9376 vassert(sz == 4);
9377 delta = dis_SSE_E_to_G_unary_lo32( sorb, delta+3,
9378 "rsqrtss", Iop_RSqrt32F0x4 );
9379 goto decode_success;
9380 }
9381
9382 /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */
9383 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC6) {
9384 Int select;
9385 IRTemp sV, dV;
9386 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
9387 sV = newTemp(Ity_V128);
9388 dV = newTemp(Ity_V128);
9389 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
9390 modrm = insn[2];
9391 assign( dV, getXMMReg(gregOfRM(modrm)) );
9392
9393 if (epartIsReg(modrm)) {
9394 assign( sV, getXMMReg(eregOfRM(modrm)) );
9395 select = (Int)insn[3];
9396 delta += 2+2;
9397 DIP("shufps $%d,%s,%s\n", select,
9398 nameXMMReg(eregOfRM(modrm)),
9399 nameXMMReg(gregOfRM(modrm)));
9400 } else {
9401 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9402 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
9403 select = (Int)insn[2+alen];
9404 delta += 3+alen;
9405 DIP("shufps $%d,%s,%s\n", select,
9406 dis_buf,
9407 nameXMMReg(gregOfRM(modrm)));
9408 }
9409
9410 breakup128to32s( dV, &d3, &d2, &d1, &d0 );
9411 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
9412
9413 # define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3)))
9414 # define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
9415
9416 putXMMReg(
9417 gregOfRM(modrm),
9418 mk128from32s( SELS((select>>6)&3), SELS((select>>4)&3),
9419 SELD((select>>2)&3), SELD((select>>0)&3) )
9420 );
9421
9422 # undef SELD
9423 # undef SELS
9424
9425 goto decode_success;
9426 }
9427
9428 /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */
9429 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x51) {
9430 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2,
9431 "sqrtps", Iop_Sqrt32Fx4 );
9432 goto decode_success;
9433 }
9434
9435 /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */
9436 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x51) {
9437 vassert(sz == 4);
9438 delta = dis_SSE_E_to_G_unary_lo32( sorb, delta+3,
9439 "sqrtss", Iop_Sqrt32F0x4 );
9440 goto decode_success;
9441 }
9442
9443 /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */
9444 if (insn[0] == 0x0F && insn[1] == 0xAE
9445 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 3) {
9446 modrm = getIByte(delta+2);
9447 vassert(sz == 4);
9448 vassert(!epartIsReg(modrm));
9449
9450 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9451 delta += 2+alen;
9452
9453 /* Fake up a native SSE mxcsr word. The only thing it depends
9454 on is SSEROUND[1:0], so call a clean helper to cook it up.
9455 */
9456 /* UInt x86h_create_mxcsr ( UInt sseround ) */
9457 DIP("stmxcsr %s\n", dis_buf);
9458 storeLE( mkexpr(addr),
9459 mkIRExprCCall(
9460 Ity_I32, 0/*regp*/,
9461 "x86g_create_mxcsr", &x86g_create_mxcsr,
9462 mkIRExprVec_1( get_sse_roundingmode() )
9463 )
9464 );
9465 goto decode_success;
9466 }
9467
9468 /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */
9469 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5C) {
9470 delta = dis_SSE_E_to_G_all( sorb, delta+2, "subps", Iop_Sub32Fx4 );
9471 goto decode_success;
9472 }
9473
9474 /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */
9475 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5C) {
9476 vassert(sz == 4);
9477 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "subss", Iop_Sub32F0x4 );
9478 goto decode_success;
9479 }
9480
9481 /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */
9482 /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */
9483 /* These just appear to be special cases of SHUFPS */
9484 if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x15 || insn[1] == 0x14)) {
9485 IRTemp sV, dV;
9486 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
9487 Bool hi = toBool(insn[1] == 0x15);
9488 sV = newTemp(Ity_V128);
9489 dV = newTemp(Ity_V128);
9490 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
9491 modrm = insn[2];
9492 assign( dV, getXMMReg(gregOfRM(modrm)) );
9493
9494 if (epartIsReg(modrm)) {
9495 assign( sV, getXMMReg(eregOfRM(modrm)) );
9496 delta += 2+1;
9497 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
9498 nameXMMReg(eregOfRM(modrm)),
9499 nameXMMReg(gregOfRM(modrm)));
9500 } else {
9501 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9502 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
9503 delta += 2+alen;
9504 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
9505 dis_buf,
9506 nameXMMReg(gregOfRM(modrm)));
9507 }
9508
9509 breakup128to32s( dV, &d3, &d2, &d1, &d0 );
9510 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
9511
9512 if (hi) {
9513 putXMMReg( gregOfRM(modrm), mk128from32s( s3, d3, s2, d2 ) );
9514 } else {
9515 putXMMReg( gregOfRM(modrm), mk128from32s( s1, d1, s0, d0 ) );
9516 }
9517
9518 goto decode_success;
9519 }
9520
9521 /* 0F 57 = XORPS -- G = G and E */
9522 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x57) {
9523 delta = dis_SSE_E_to_G_all( sorb, delta+2, "xorps", Iop_XorV128 );
9524 goto decode_success;
9525 }
9526
9527 /* ---------------------------------------------------- */
9528 /* --- end of the SSE decoder. --- */
9529 /* ---------------------------------------------------- */
9530
9531 /* ---------------------------------------------------- */
9532 /* --- start of the SSE2 decoder. --- */
9533 /* ---------------------------------------------------- */
9534
9535 /* Skip parts of the decoder which don't apply given the stated
9536 guest subarchitecture. */
9537 if (0 == (archinfo->hwcaps & VEX_HWCAPS_X86_SSE2))
9538 goto after_sse_decoders; /* no SSE2 capabilities */
9539
9540 insn = (UChar*)&guest_code[delta];
9541
9542 /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */
9543 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x58) {
9544 delta = dis_SSE_E_to_G_all( sorb, delta+2, "addpd", Iop_Add64Fx2 );
9545 goto decode_success;
9546 }
9547
9548 /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */
9549 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x58) {
9550 vassert(sz == 4);
9551 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "addsd", Iop_Add64F0x2 );
9552 goto decode_success;
9553 }
9554
9555 /* 66 0F 55 = ANDNPD -- G = (not G) and E */
9556 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x55) {
9557 delta = dis_SSE_E_to_G_all_invG( sorb, delta+2, "andnpd", Iop_AndV128 );
9558 goto decode_success;
9559 }
9560
9561 /* 66 0F 54 = ANDPD -- G = G and E */
9562 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x54) {
9563 delta = dis_SSE_E_to_G_all( sorb, delta+2, "andpd", Iop_AndV128 );
9564 goto decode_success;
9565 }
9566
9567 /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */
9568 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xC2) {
9569 delta = dis_SSEcmp_E_to_G( sorb, delta+2, "cmppd", True, 8 );
9570 goto decode_success;
9571 }
9572
9573 /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */
9574 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xC2) {
9575 vassert(sz == 4);
9576 delta = dis_SSEcmp_E_to_G( sorb, delta+3, "cmpsd", False, 8 );
9577 goto decode_success;
9578 }
9579
9580 /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */
9581 /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */
9582 if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x2F || insn[1] == 0x2E)) {
9583 IRTemp argL = newTemp(Ity_F64);
9584 IRTemp argR = newTemp(Ity_F64);
9585 modrm = getIByte(delta+2);
9586 if (epartIsReg(modrm)) {
9587 assign( argR, getXMMRegLane64F( eregOfRM(modrm), 0/*lowest lane*/ ) );
9588 delta += 2+1;
9589 DIP("[u]comisd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9590 nameXMMReg(gregOfRM(modrm)) );
9591 } else {
9592 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9593 assign( argR, loadLE(Ity_F64, mkexpr(addr)) );
9594 delta += 2+alen;
9595 DIP("[u]comisd %s,%s\n", dis_buf,
9596 nameXMMReg(gregOfRM(modrm)) );
9597 }
9598 assign( argL, getXMMRegLane64F( gregOfRM(modrm), 0/*lowest lane*/ ) );
9599
9600 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
9601 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
9602 stmt( IRStmt_Put(
9603 OFFB_CC_DEP1,
9604 binop( Iop_And32,
9605 binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)),
9606 mkU32(0x45)
9607 )));
9608 /* Set NDEP even though it isn't used. This makes redundant-PUT
9609 elimination of previous stores to this field work better. */
9610 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
9611 goto decode_success;
9612 }
9613
9614 /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x
9615 F64 in xmm(G) */
9616 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xE6) {
9617 IRTemp arg64 = newTemp(Ity_I64);
9618 vassert(sz == 4);
9619
9620 modrm = getIByte(delta+3);
9621 if (epartIsReg(modrm)) {
9622 assign( arg64, getXMMRegLane64(eregOfRM(modrm), 0) );
9623 delta += 3+1;
9624 DIP("cvtdq2pd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9625 nameXMMReg(gregOfRM(modrm)));
9626 } else {
9627 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
9628 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
9629 delta += 3+alen;
9630 DIP("cvtdq2pd %s,%s\n", dis_buf,
9631 nameXMMReg(gregOfRM(modrm)) );
9632 }
9633
9634 putXMMRegLane64F(
9635 gregOfRM(modrm), 0,
9636 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)))
9637 );
9638
9639 putXMMRegLane64F(
9640 gregOfRM(modrm), 1,
9641 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)))
9642 );
9643
9644 goto decode_success;
9645 }
9646
9647 /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in
9648 xmm(G) */
9649 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5B) {
9650 IRTemp argV = newTemp(Ity_V128);
9651 IRTemp rmode = newTemp(Ity_I32);
9652
9653 modrm = getIByte(delta+2);
9654 if (epartIsReg(modrm)) {
9655 assign( argV, getXMMReg(eregOfRM(modrm)) );
9656 delta += 2+1;
9657 DIP("cvtdq2ps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9658 nameXMMReg(gregOfRM(modrm)));
9659 } else {
9660 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9661 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
9662 delta += 2+alen;
9663 DIP("cvtdq2ps %s,%s\n", dis_buf,
9664 nameXMMReg(gregOfRM(modrm)) );
9665 }
9666
9667 assign( rmode, get_sse_roundingmode() );
9668 breakup128to32s( argV, &t3, &t2, &t1, &t0 );
9669
9670 # define CVT(_t) binop( Iop_F64toF32, \
9671 mkexpr(rmode), \
9672 unop(Iop_I32StoF64,mkexpr(_t)))
9673
9674 putXMMRegLane32F( gregOfRM(modrm), 3, CVT(t3) );
9675 putXMMRegLane32F( gregOfRM(modrm), 2, CVT(t2) );
9676 putXMMRegLane32F( gregOfRM(modrm), 1, CVT(t1) );
9677 putXMMRegLane32F( gregOfRM(modrm), 0, CVT(t0) );
9678
9679 # undef CVT
9680
9681 goto decode_success;
9682 }
9683
9684 /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
9685 lo half xmm(G), and zero upper half */
9686 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xE6) {
9687 IRTemp argV = newTemp(Ity_V128);
9688 IRTemp rmode = newTemp(Ity_I32);
9689 vassert(sz == 4);
9690
9691 modrm = getIByte(delta+3);
9692 if (epartIsReg(modrm)) {
9693 assign( argV, getXMMReg(eregOfRM(modrm)) );
9694 delta += 3+1;
9695 DIP("cvtpd2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9696 nameXMMReg(gregOfRM(modrm)));
9697 } else {
9698 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
9699 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
9700 delta += 3+alen;
9701 DIP("cvtpd2dq %s,%s\n", dis_buf,
9702 nameXMMReg(gregOfRM(modrm)) );
9703 }
9704
9705 assign( rmode, get_sse_roundingmode() );
9706 t0 = newTemp(Ity_F64);
9707 t1 = newTemp(Ity_F64);
9708 assign( t0, unop(Iop_ReinterpI64asF64,
9709 unop(Iop_V128to64, mkexpr(argV))) );
9710 assign( t1, unop(Iop_ReinterpI64asF64,
9711 unop(Iop_V128HIto64, mkexpr(argV))) );
9712
9713 # define CVT(_t) binop( Iop_F64toI32S, \
9714 mkexpr(rmode), \
9715 mkexpr(_t) )
9716
9717 putXMMRegLane32( gregOfRM(modrm), 3, mkU32(0) );
9718 putXMMRegLane32( gregOfRM(modrm), 2, mkU32(0) );
9719 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) );
9720 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) );
9721
9722 # undef CVT
9723
9724 goto decode_success;
9725 }
9726
9727 /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
9728 I32 in mmx, according to prevailing SSE rounding mode */
9729 /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
9730 I32 in mmx, rounding towards zero */
9731 if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x2D || insn[1] == 0x2C)) {
9732 IRTemp dst64 = newTemp(Ity_I64);
9733 IRTemp rmode = newTemp(Ity_I32);
9734 IRTemp f64lo = newTemp(Ity_F64);
9735 IRTemp f64hi = newTemp(Ity_F64);
9736 Bool r2zero = toBool(insn[1] == 0x2C);
9737
9738 do_MMX_preamble();
9739 modrm = getIByte(delta+2);
9740
9741 if (epartIsReg(modrm)) {
9742 delta += 2+1;
9743 assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0));
9744 assign(f64hi, getXMMRegLane64F(eregOfRM(modrm), 1));
9745 DIP("cvt%spd2pi %s,%s\n", r2zero ? "t" : "",
9746 nameXMMReg(eregOfRM(modrm)),
9747 nameMMXReg(gregOfRM(modrm)));
9748 } else {
9749 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9750 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
9751 assign(f64hi, loadLE(Ity_F64, binop( Iop_Add32,
9752 mkexpr(addr),
9753 mkU32(8) )));
9754 delta += 2+alen;
9755 DIP("cvt%spf2pi %s,%s\n", r2zero ? "t" : "",
9756 dis_buf,
9757 nameMMXReg(gregOfRM(modrm)));
9758 }
9759
9760 if (r2zero) {
9761 assign(rmode, mkU32((UInt)Irrm_ZERO) );
9762 } else {
9763 assign( rmode, get_sse_roundingmode() );
9764 }
9765
9766 assign(
9767 dst64,
9768 binop( Iop_32HLto64,
9769 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64hi) ),
9770 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo) )
9771 )
9772 );
9773
9774 putMMXReg(gregOfRM(modrm), mkexpr(dst64));
9775 goto decode_success;
9776 }
9777
9778 /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in
9779 lo half xmm(G), and zero upper half */
9780 /* Note, this is practically identical to CVTPD2DQ. It would have
9781 been nicer to merge them together, but the insn[] offsets differ
9782 by one. */
9783 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5A) {
9784 IRTemp argV = newTemp(Ity_V128);
9785 IRTemp rmode = newTemp(Ity_I32);
9786
9787 modrm = getIByte(delta+2);
9788 if (epartIsReg(modrm)) {
9789 assign( argV, getXMMReg(eregOfRM(modrm)) );
9790 delta += 2+1;
9791 DIP("cvtpd2ps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9792 nameXMMReg(gregOfRM(modrm)));
9793 } else {
9794 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9795 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
9796 delta += 2+alen;
9797 DIP("cvtpd2ps %s,%s\n", dis_buf,
9798 nameXMMReg(gregOfRM(modrm)) );
9799 }
9800
9801 assign( rmode, get_sse_roundingmode() );
9802 t0 = newTemp(Ity_F64);
9803 t1 = newTemp(Ity_F64);
9804 assign( t0, unop(Iop_ReinterpI64asF64,
9805 unop(Iop_V128to64, mkexpr(argV))) );
9806 assign( t1, unop(Iop_ReinterpI64asF64,
9807 unop(Iop_V128HIto64, mkexpr(argV))) );
9808
9809 # define CVT(_t) binop( Iop_F64toF32, \
9810 mkexpr(rmode), \
9811 mkexpr(_t) )
9812
9813 putXMMRegLane32( gregOfRM(modrm), 3, mkU32(0) );
9814 putXMMRegLane32( gregOfRM(modrm), 2, mkU32(0) );
9815 putXMMRegLane32F( gregOfRM(modrm), 1, CVT(t1) );
9816 putXMMRegLane32F( gregOfRM(modrm), 0, CVT(t0) );
9817
9818 # undef CVT
9819
9820 goto decode_success;
9821 }
9822
9823 /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in
9824 xmm(G) */
9825 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x2A) {
9826 IRTemp arg64 = newTemp(Ity_I64);
9827
9828 modrm = getIByte(delta+2);
9829 if (epartIsReg(modrm)) {
9830 /* Only switch to MMX mode if the source is a MMX register.
9831 This is inconsistent with all other instructions which
9832 convert between XMM and (M64 or MMX), which always switch
9833 to MMX mode even if 64-bit operand is M64 and not MMX. At
9834 least, that's what the Intel docs seem to me to say.
9835 Fixes #210264. */
9836 do_MMX_preamble();
9837 assign( arg64, getMMXReg(eregOfRM(modrm)) );
9838 delta += 2+1;
9839 DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregOfRM(modrm)),
9840 nameXMMReg(gregOfRM(modrm)));
9841 } else {
9842 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9843 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
9844 delta += 2+alen;
9845 DIP("cvtpi2pd %s,%s\n", dis_buf,
9846 nameXMMReg(gregOfRM(modrm)) );
9847 }
9848
9849 putXMMRegLane64F(
9850 gregOfRM(modrm), 0,
9851 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)) )
9852 );
9853
9854 putXMMRegLane64F(
9855 gregOfRM(modrm), 1,
9856 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)) )
9857 );
9858
9859 goto decode_success;
9860 }
9861
9862 /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
9863 xmm(G) */
9864 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5B) {
9865 IRTemp argV = newTemp(Ity_V128);
9866 IRTemp rmode = newTemp(Ity_I32);
9867
9868 modrm = getIByte(delta+2);
9869 if (epartIsReg(modrm)) {
9870 assign( argV, getXMMReg(eregOfRM(modrm)) );
9871 delta += 2+1;
9872 DIP("cvtps2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9873 nameXMMReg(gregOfRM(modrm)));
9874 } else {
9875 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9876 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
9877 delta += 2+alen;
9878 DIP("cvtps2dq %s,%s\n", dis_buf,
9879 nameXMMReg(gregOfRM(modrm)) );
9880 }
9881
9882 assign( rmode, get_sse_roundingmode() );
9883 breakup128to32s( argV, &t3, &t2, &t1, &t0 );
9884
9885 /* This is less than ideal. If it turns out to be a performance
9886 bottleneck it can be improved. */
9887 # define CVT(_t) \
9888 binop( Iop_F64toI32S, \
9889 mkexpr(rmode), \
9890 unop( Iop_F32toF64, \
9891 unop( Iop_ReinterpI32asF32, mkexpr(_t))) )
9892
9893 putXMMRegLane32( gregOfRM(modrm), 3, CVT(t3) );
9894 putXMMRegLane32( gregOfRM(modrm), 2, CVT(t2) );
9895 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) );
9896 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) );
9897
9898 # undef CVT
9899
9900 goto decode_success;
9901 }
9902
9903 /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x
9904 F64 in xmm(G). */
9905 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5A) {
9906 IRTemp f32lo = newTemp(Ity_F32);
9907 IRTemp f32hi = newTemp(Ity_F32);
9908
9909 modrm = getIByte(delta+2);
9910 if (epartIsReg(modrm)) {
9911 assign( f32lo, getXMMRegLane32F(eregOfRM(modrm), 0) );
9912 assign( f32hi, getXMMRegLane32F(eregOfRM(modrm), 1) );
9913 delta += 2+1;
9914 DIP("cvtps2pd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9915 nameXMMReg(gregOfRM(modrm)));
9916 } else {
9917 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9918 assign( f32lo, loadLE(Ity_F32, mkexpr(addr)) );
9919 assign( f32hi, loadLE(Ity_F32,
9920 binop(Iop_Add32,mkexpr(addr),mkU32(4))) );
9921 delta += 2+alen;
9922 DIP("cvtps2pd %s,%s\n", dis_buf,
9923 nameXMMReg(gregOfRM(modrm)) );
9924 }
9925
9926 putXMMRegLane64F( gregOfRM(modrm), 1,
9927 unop(Iop_F32toF64, mkexpr(f32hi)) );
9928 putXMMRegLane64F( gregOfRM(modrm), 0,
9929 unop(Iop_F32toF64, mkexpr(f32lo)) );
9930
9931 goto decode_success;
9932 }
9933
9934 /* F2 0F 2D = CVTSD2SI -- convert F64 in mem/low half xmm to
9935 I32 in ireg, according to prevailing SSE rounding mode */
9936 /* F2 0F 2C = CVTTSD2SI -- convert F64 in mem/low half xmm to
9937 I32 in ireg, rounding towards zero */
9938 if (insn[0] == 0xF2 && insn[1] == 0x0F
9939 && (insn[2] == 0x2D || insn[2] == 0x2C)) {
9940 IRTemp rmode = newTemp(Ity_I32);
9941 IRTemp f64lo = newTemp(Ity_F64);
9942 Bool r2zero = toBool(insn[2] == 0x2C);
9943 vassert(sz == 4);
9944
9945 modrm = getIByte(delta+3);
9946 if (epartIsReg(modrm)) {
9947 delta += 3+1;
9948 assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0));
9949 DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "",
9950 nameXMMReg(eregOfRM(modrm)),
9951 nameIReg(4, gregOfRM(modrm)));
9952 } else {
9953 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
9954 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
9955 delta += 3+alen;
9956 DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "",
9957 dis_buf,
9958 nameIReg(4, gregOfRM(modrm)));
9959 }
9960
9961 if (r2zero) {
9962 assign( rmode, mkU32((UInt)Irrm_ZERO) );
9963 } else {
9964 assign( rmode, get_sse_roundingmode() );
9965 }
9966
9967 putIReg(4, gregOfRM(modrm),
9968 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo)) );
9969
9970 goto decode_success;
9971 }
9972
9973 /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in
9974 low 1/4 xmm(G), according to prevailing SSE rounding mode */
9975 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5A) {
9976 IRTemp rmode = newTemp(Ity_I32);
9977 IRTemp f64lo = newTemp(Ity_F64);
9978 vassert(sz == 4);
9979
9980 modrm = getIByte(delta+3);
9981 if (epartIsReg(modrm)) {
9982 delta += 3+1;
9983 assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0));
9984 DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9985 nameXMMReg(gregOfRM(modrm)));
9986 } else {
9987 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
9988 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
9989 delta += 3+alen;
9990 DIP("cvtsd2ss %s,%s\n", dis_buf,
9991 nameXMMReg(gregOfRM(modrm)));
9992 }
9993
9994 assign( rmode, get_sse_roundingmode() );
9995 putXMMRegLane32F(
9996 gregOfRM(modrm), 0,
9997 binop( Iop_F64toF32, mkexpr(rmode), mkexpr(f64lo) )
9998 );
9999
10000 goto decode_success;
10001 }
10002
10003 /* F2 0F 2A = CVTSI2SD -- convert I32 in mem/ireg to F64 in low
10004 half xmm */
10005 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x2A) {
10006 IRTemp arg32 = newTemp(Ity_I32);
10007 vassert(sz == 4);
10008
10009 modrm = getIByte(delta+3);
10010 if (epartIsReg(modrm)) {
10011 assign( arg32, getIReg(4, eregOfRM(modrm)) );
10012 delta += 3+1;
10013 DIP("cvtsi2sd %s,%s\n", nameIReg(4, eregOfRM(modrm)),
10014 nameXMMReg(gregOfRM(modrm)));
10015 } else {
10016 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
10017 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
10018 delta += 3+alen;
10019 DIP("cvtsi2sd %s,%s\n", dis_buf,
10020 nameXMMReg(gregOfRM(modrm)) );
10021 }
10022
10023 putXMMRegLane64F(
10024 gregOfRM(modrm), 0,
10025 unop(Iop_I32StoF64, mkexpr(arg32)) );
10026
10027 goto decode_success;
10028 }
10029
10030 /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in
10031 low half xmm(G) */
10032 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5A) {
10033 IRTemp f32lo = newTemp(Ity_F32);
10034 vassert(sz == 4);
10035
10036 modrm = getIByte(delta+3);
10037 if (epartIsReg(modrm)) {
10038 delta += 3+1;
10039 assign(f32lo, getXMMRegLane32F(eregOfRM(modrm), 0));
10040 DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10041 nameXMMReg(gregOfRM(modrm)));
10042 } else {
10043 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
10044 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
10045 delta += 3+alen;
10046 DIP("cvtss2sd %s,%s\n", dis_buf,
10047 nameXMMReg(gregOfRM(modrm)));
10048 }
10049
10050 putXMMRegLane64F( gregOfRM(modrm), 0,
10051 unop( Iop_F32toF64, mkexpr(f32lo) ) );
10052
10053 goto decode_success;
10054 }
10055
10056 /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
10057 lo half xmm(G), and zero upper half, rounding towards zero */
10058 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE6) {
10059 IRTemp argV = newTemp(Ity_V128);
10060 IRTemp rmode = newTemp(Ity_I32);
10061
10062 modrm = getIByte(delta+2);
10063 if (epartIsReg(modrm)) {
10064 assign( argV, getXMMReg(eregOfRM(modrm)) );
10065 delta += 2+1;
10066 DIP("cvttpd2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10067 nameXMMReg(gregOfRM(modrm)));
10068 } else {
10069 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10070 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10071 delta += 2+alen;
10072 DIP("cvttpd2dq %s,%s\n", dis_buf,
10073 nameXMMReg(gregOfRM(modrm)) );
10074 }
10075
10076 assign( rmode, mkU32((UInt)Irrm_ZERO) );
10077
10078 t0 = newTemp(Ity_F64);
10079 t1 = newTemp(Ity_F64);
10080 assign( t0, unop(Iop_ReinterpI64asF64,
10081 unop(Iop_V128to64, mkexpr(argV))) );
10082 assign( t1, unop(Iop_ReinterpI64asF64,
10083 unop(Iop_V128HIto64, mkexpr(argV))) );
10084
10085 # define CVT(_t) binop( Iop_F64toI32S, \
10086 mkexpr(rmode), \
10087 mkexpr(_t) )
10088
10089 putXMMRegLane32( gregOfRM(modrm), 3, mkU32(0) );
10090 putXMMRegLane32( gregOfRM(modrm), 2, mkU32(0) );
10091 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) );
10092 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) );
10093
10094 # undef CVT
10095
10096 goto decode_success;
10097 }
10098
10099 /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
10100 xmm(G), rounding towards zero */
10101 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5B) {
10102 IRTemp argV = newTemp(Ity_V128);
10103 IRTemp rmode = newTemp(Ity_I32);
10104 vassert(sz == 4);
10105
10106 modrm = getIByte(delta+3);
10107 if (epartIsReg(modrm)) {
10108 assign( argV, getXMMReg(eregOfRM(modrm)) );
10109 delta += 3+1;
10110 DIP("cvttps2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10111 nameXMMReg(gregOfRM(modrm)));
10112 } else {
10113 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
10114 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10115 delta += 3+alen;
10116 DIP("cvttps2dq %s,%s\n", dis_buf,
10117 nameXMMReg(gregOfRM(modrm)) );
10118 }
10119
10120 assign( rmode, mkU32((UInt)Irrm_ZERO) );
10121 breakup128to32s( argV, &t3, &t2, &t1, &t0 );
10122
10123 /* This is less than ideal. If it turns out to be a performance
10124 bottleneck it can be improved. */
10125 # define CVT(_t) \
10126 binop( Iop_F64toI32S, \
10127 mkexpr(rmode), \
10128 unop( Iop_F32toF64, \
10129 unop( Iop_ReinterpI32asF32, mkexpr(_t))) )
10130
10131 putXMMRegLane32( gregOfRM(modrm), 3, CVT(t3) );
10132 putXMMRegLane32( gregOfRM(modrm), 2, CVT(t2) );
10133 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) );
10134 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) );
10135
10136 # undef CVT
10137
10138 goto decode_success;
10139 }
10140
10141 /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */
10142 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5E) {
10143 delta = dis_SSE_E_to_G_all( sorb, delta+2, "divpd", Iop_Div64Fx2 );
10144 goto decode_success;
10145 }
10146
10147 /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */
10148 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5E) {
10149 vassert(sz == 4);
10150 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "divsd", Iop_Div64F0x2 );
10151 goto decode_success;
10152 }
10153
10154 /* 0F AE /5 = LFENCE -- flush pending operations to memory */
10155 /* 0F AE /6 = MFENCE -- flush pending operations to memory */
10156 if (insn[0] == 0x0F && insn[1] == 0xAE
10157 && epartIsReg(insn[2])
10158 && (gregOfRM(insn[2]) == 5 || gregOfRM(insn[2]) == 6)) {
10159 vassert(sz == 4);
10160 delta += 3;
10161 /* Insert a memory fence. It's sometimes important that these
10162 are carried through to the generated code. */
10163 stmt( IRStmt_MBE(Imbe_Fence) );
10164 DIP("%sfence\n", gregOfRM(insn[2])==5 ? "l" : "m");
10165 goto decode_success;
10166 }
10167
10168 /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */
10169 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5F) {
10170 delta = dis_SSE_E_to_G_all( sorb, delta+2, "maxpd", Iop_Max64Fx2 );
10171 goto decode_success;
10172 }
10173
10174 /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */
10175 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5F) {
10176 vassert(sz == 4);
10177 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "maxsd", Iop_Max64F0x2 );
10178 goto decode_success;
10179 }
10180
10181 /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */
10182 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5D) {
10183 delta = dis_SSE_E_to_G_all( sorb, delta+2, "minpd", Iop_Min64Fx2 );
10184 goto decode_success;
10185 }
10186
10187 /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */
10188 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5D) {
10189 vassert(sz == 4);
10190 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "minsd", Iop_Min64F0x2 );
10191 goto decode_success;
10192 }
10193
10194 /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */
10195 /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */
10196 /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */
10197 if (sz == 2 && insn[0] == 0x0F
10198 && (insn[1] == 0x28 || insn[1] == 0x10 || insn[1] == 0x6F)) {
10199 const HChar* wot = insn[1]==0x28 ? "apd" :
10200 insn[1]==0x10 ? "upd" : "dqa";
10201 modrm = getIByte(delta+2);
10202 if (epartIsReg(modrm)) {
10203 putXMMReg( gregOfRM(modrm),
10204 getXMMReg( eregOfRM(modrm) ));
10205 DIP("mov%s %s,%s\n", wot, nameXMMReg(eregOfRM(modrm)),
10206 nameXMMReg(gregOfRM(modrm)));
10207 delta += 2+1;
10208 } else {
10209 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10210 if (insn[1] == 0x28/*movapd*/ || insn[1] == 0x6F/*movdqa*/)
10211 gen_SEGV_if_not_16_aligned( addr );
10212 putXMMReg( gregOfRM(modrm),
10213 loadLE(Ity_V128, mkexpr(addr)) );
10214 DIP("mov%s %s,%s\n", wot, dis_buf,
10215 nameXMMReg(gregOfRM(modrm)));
10216 delta += 2+alen;
10217 }
10218 goto decode_success;
10219 }
10220
10221 /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */
10222 /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */
10223 if (sz == 2 && insn[0] == 0x0F
10224 && (insn[1] == 0x29 || insn[1] == 0x11)) {
10225 const HChar* wot = insn[1]==0x29 ? "apd" : "upd";
10226 modrm = getIByte(delta+2);
10227 if (epartIsReg(modrm)) {
10228 /* fall through; awaiting test case */
10229 } else {
10230 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10231 if (insn[1] == 0x29/*movapd*/)
10232 gen_SEGV_if_not_16_aligned( addr );
10233 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
10234 DIP("mov%s %s,%s\n", wot, nameXMMReg(gregOfRM(modrm)),
10235 dis_buf );
10236 delta += 2+alen;
10237 goto decode_success;
10238 }
10239 }
10240
10241 /* 66 0F 6E = MOVD from r/m32 to xmm, zeroing high 3/4 of xmm. */
10242 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6E) {
10243 modrm = getIByte(delta+2);
10244 if (epartIsReg(modrm)) {
10245 delta += 2+1;
10246 putXMMReg(
10247 gregOfRM(modrm),
10248 unop( Iop_32UtoV128, getIReg(4, eregOfRM(modrm)) )
10249 );
10250 DIP("movd %s, %s\n",
10251 nameIReg(4,eregOfRM(modrm)), nameXMMReg(gregOfRM(modrm)));
10252 } else {
10253 addr = disAMode( &alen, sorb, delta+2, dis_buf );
10254 delta += 2+alen;
10255 putXMMReg(
10256 gregOfRM(modrm),
10257 unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)) )
10258 );
10259 DIP("movd %s, %s\n", dis_buf, nameXMMReg(gregOfRM(modrm)));
10260 }
10261 goto decode_success;
10262 }
10263
10264 /* 66 0F 7E = MOVD from xmm low 1/4 to r/m32. */
10265 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x7E) {
10266 modrm = getIByte(delta+2);
10267 if (epartIsReg(modrm)) {
10268 delta += 2+1;
10269 putIReg( 4, eregOfRM(modrm),
10270 getXMMRegLane32(gregOfRM(modrm), 0) );
10271 DIP("movd %s, %s\n",
10272 nameXMMReg(gregOfRM(modrm)), nameIReg(4,eregOfRM(modrm)));
10273 } else {
10274 addr = disAMode( &alen, sorb, delta+2, dis_buf );
10275 delta += 2+alen;
10276 storeLE( mkexpr(addr),
10277 getXMMRegLane32(gregOfRM(modrm), 0) );
10278 DIP("movd %s, %s\n", nameXMMReg(gregOfRM(modrm)), dis_buf);
10279 }
10280 goto decode_success;
10281 }
10282
10283 /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */
10284 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x7F) {
10285 modrm = getIByte(delta+2);
10286 if (epartIsReg(modrm)) {
10287 delta += 2+1;
10288 putXMMReg( eregOfRM(modrm),
10289 getXMMReg(gregOfRM(modrm)) );
10290 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRM(modrm)),
10291 nameXMMReg(eregOfRM(modrm)));
10292 } else {
10293 addr = disAMode( &alen, sorb, delta+2, dis_buf );
10294 delta += 2+alen;
10295 gen_SEGV_if_not_16_aligned( addr );
10296 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
10297 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRM(modrm)), dis_buf);
10298 }
10299 goto decode_success;
10300 }
10301
10302 /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */
10303 /* Unfortunately can't simply use the MOVDQA case since the
10304 prefix lengths are different (66 vs F3) */
10305 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x6F) {
10306 vassert(sz == 4);
10307 modrm = getIByte(delta+3);
10308 if (epartIsReg(modrm)) {
10309 putXMMReg( gregOfRM(modrm),
10310 getXMMReg( eregOfRM(modrm) ));
10311 DIP("movdqu %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10312 nameXMMReg(gregOfRM(modrm)));
10313 delta += 3+1;
10314 } else {
10315 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
10316 putXMMReg( gregOfRM(modrm),
10317 loadLE(Ity_V128, mkexpr(addr)) );
10318 DIP("movdqu %s,%s\n", dis_buf,
10319 nameXMMReg(gregOfRM(modrm)));
10320 delta += 3+alen;
10321 }
10322 goto decode_success;
10323 }
10324
10325 /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */
10326 /* Unfortunately can't simply use the MOVDQA case since the
10327 prefix lengths are different (66 vs F3) */
10328 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x7F) {
10329 vassert(sz == 4);
10330 modrm = getIByte(delta+3);
10331 if (epartIsReg(modrm)) {
10332 delta += 3+1;
10333 putXMMReg( eregOfRM(modrm),
10334 getXMMReg(gregOfRM(modrm)) );
10335 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRM(modrm)),
10336 nameXMMReg(eregOfRM(modrm)));
10337 } else {
10338 addr = disAMode( &alen, sorb, delta+3, dis_buf );
10339 delta += 3+alen;
10340 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
10341 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRM(modrm)), dis_buf);
10342 }
10343 goto decode_success;
10344 }
10345
10346 /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */
10347 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xD6) {
10348 vassert(sz == 4);
10349 modrm = getIByte(delta+3);
10350 if (epartIsReg(modrm)) {
10351 do_MMX_preamble();
10352 putMMXReg( gregOfRM(modrm),
10353 getXMMRegLane64( eregOfRM(modrm), 0 ));
10354 DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10355 nameMMXReg(gregOfRM(modrm)));
10356 delta += 3+1;
10357 goto decode_success;
10358 } else {
10359 /* fall through, apparently no mem case for this insn */
10360 }
10361 }
10362
10363 /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */
10364 /* These seems identical to MOVHPS. This instruction encoding is
10365 completely crazy. */
10366 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x16) {
10367 modrm = getIByte(delta+2);
10368 if (epartIsReg(modrm)) {
10369 /* fall through; apparently reg-reg is not possible */
10370 } else {
10371 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10372 delta += 2+alen;
10373 putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/,
10374 loadLE(Ity_I64, mkexpr(addr)) );
10375 DIP("movhpd %s,%s\n", dis_buf,
10376 nameXMMReg( gregOfRM(modrm) ));
10377 goto decode_success;
10378 }
10379 }
10380
10381 /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */
10382 /* Again, this seems identical to MOVHPS. */
10383 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x17) {
10384 if (!epartIsReg(insn[2])) {
10385 delta += 2;
10386 addr = disAMode ( &alen, sorb, delta, dis_buf );
10387 delta += alen;
10388 storeLE( mkexpr(addr),
10389 getXMMRegLane64( gregOfRM(insn[2]),
10390 1/*upper lane*/ ) );
10391 DIP("movhpd %s,%s\n", nameXMMReg( gregOfRM(insn[2]) ),
10392 dis_buf);
10393 goto decode_success;
10394 }
10395 /* else fall through */
10396 }
10397
10398 /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */
10399 /* Identical to MOVLPS ? */
10400 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x12) {
10401 modrm = getIByte(delta+2);
10402 if (epartIsReg(modrm)) {
10403 /* fall through; apparently reg-reg is not possible */
10404 } else {
10405 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10406 delta += 2+alen;
10407 putXMMRegLane64( gregOfRM(modrm), 0/*lower lane*/,
10408 loadLE(Ity_I64, mkexpr(addr)) );
10409 DIP("movlpd %s, %s\n",
10410 dis_buf, nameXMMReg( gregOfRM(modrm) ));
10411 goto decode_success;
10412 }
10413 }
10414
10415 /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */
10416 /* Identical to MOVLPS ? */
10417 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x13) {
10418 if (!epartIsReg(insn[2])) {
10419 delta += 2;
10420 addr = disAMode ( &alen, sorb, delta, dis_buf );
10421 delta += alen;
10422 storeLE( mkexpr(addr),
10423 getXMMRegLane64( gregOfRM(insn[2]),
10424 0/*lower lane*/ ) );
10425 DIP("movlpd %s, %s\n", nameXMMReg( gregOfRM(insn[2]) ),
10426 dis_buf);
10427 goto decode_success;
10428 }
10429 /* else fall through */
10430 }
10431
10432 /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to
10433 2 lowest bits of ireg(G) */
10434 if (insn[0] == 0x0F && insn[1] == 0x50) {
10435 modrm = getIByte(delta+2);
10436 if (sz == 2 && epartIsReg(modrm)) {
10437 Int src;
10438 t0 = newTemp(Ity_I32);
10439 t1 = newTemp(Ity_I32);
10440 delta += 2+1;
10441 src = eregOfRM(modrm);
10442 assign( t0, binop( Iop_And32,
10443 binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(31)),
10444 mkU32(1) ));
10445 assign( t1, binop( Iop_And32,
10446 binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(30)),
10447 mkU32(2) ));
10448 putIReg(4, gregOfRM(modrm),
10449 binop(Iop_Or32, mkexpr(t0), mkexpr(t1))
10450 );
10451 DIP("movmskpd %s,%s\n", nameXMMReg(src),
10452 nameIReg(4, gregOfRM(modrm)));
10453 goto decode_success;
10454 }
10455 /* else fall through */
10456 }
10457
10458 /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */
10459 if (insn[0] == 0x0F && insn[1] == 0xF7) {
10460 modrm = getIByte(delta+2);
10461 if (sz == 2 && epartIsReg(modrm)) {
10462 IRTemp regD = newTemp(Ity_V128);
10463 IRTemp mask = newTemp(Ity_V128);
10464 IRTemp olddata = newTemp(Ity_V128);
10465 IRTemp newdata = newTemp(Ity_V128);
10466 addr = newTemp(Ity_I32);
10467
10468 assign( addr, handleSegOverride( sorb, getIReg(4, R_EDI) ));
10469 assign( regD, getXMMReg( gregOfRM(modrm) ));
10470
10471 /* Unfortunately can't do the obvious thing with SarN8x16
10472 here since that can't be re-emitted as SSE2 code - no such
10473 insn. */
10474 assign(
10475 mask,
10476 binop(Iop_64HLtoV128,
10477 binop(Iop_SarN8x8,
10478 getXMMRegLane64( eregOfRM(modrm), 1 ),
10479 mkU8(7) ),
10480 binop(Iop_SarN8x8,
10481 getXMMRegLane64( eregOfRM(modrm), 0 ),
10482 mkU8(7) ) ));
10483 assign( olddata, loadLE( Ity_V128, mkexpr(addr) ));
10484 assign( newdata,
10485 binop(Iop_OrV128,
10486 binop(Iop_AndV128,
10487 mkexpr(regD),
10488 mkexpr(mask) ),
10489 binop(Iop_AndV128,
10490 mkexpr(olddata),
10491 unop(Iop_NotV128, mkexpr(mask)))) );
10492 storeLE( mkexpr(addr), mkexpr(newdata) );
10493
10494 delta += 2+1;
10495 DIP("maskmovdqu %s,%s\n", nameXMMReg( eregOfRM(modrm) ),
10496 nameXMMReg( gregOfRM(modrm) ) );
10497 goto decode_success;
10498 }
10499 /* else fall through */
10500 }
10501
10502 /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */
10503 if (insn[0] == 0x0F && insn[1] == 0xE7) {
10504 modrm = getIByte(delta+2);
10505 if (sz == 2 && !epartIsReg(modrm)) {
10506 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10507 gen_SEGV_if_not_16_aligned( addr );
10508 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
10509 DIP("movntdq %s,%s\n", dis_buf,
10510 nameXMMReg(gregOfRM(modrm)));
10511 delta += 2+alen;
10512 goto decode_success;
10513 }
10514 /* else fall through */
10515 }
10516
10517 /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */
10518 if (insn[0] == 0x0F && insn[1] == 0xC3) {
10519 vassert(sz == 4);
10520 modrm = getIByte(delta+2);
10521 if (!epartIsReg(modrm)) {
10522 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10523 storeLE( mkexpr(addr), getIReg(4, gregOfRM(modrm)) );
10524 DIP("movnti %s,%s\n", dis_buf,
10525 nameIReg(4, gregOfRM(modrm)));
10526 delta += 2+alen;
10527 goto decode_success;
10528 }
10529 /* else fall through */
10530 }
10531
10532 /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem
10533 or lo half xmm). */
10534 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD6) {
10535 modrm = getIByte(delta+2);
10536 if (epartIsReg(modrm)) {
10537 /* fall through, awaiting test case */
10538 /* dst: lo half copied, hi half zeroed */
10539 } else {
10540 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10541 storeLE( mkexpr(addr),
10542 getXMMRegLane64( gregOfRM(modrm), 0 ));
10543 DIP("movq %s,%s\n", nameXMMReg(gregOfRM(modrm)), dis_buf );
10544 delta += 2+alen;
10545 goto decode_success;
10546 }
10547 }
10548
10549 /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero
10550 hi half). */
10551 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xD6) {
10552 vassert(sz == 4);
10553 modrm = getIByte(delta+3);
10554 if (epartIsReg(modrm)) {
10555 do_MMX_preamble();
10556 putXMMReg( gregOfRM(modrm),
10557 unop(Iop_64UtoV128, getMMXReg( eregOfRM(modrm) )) );
10558 DIP("movq2dq %s,%s\n", nameMMXReg(eregOfRM(modrm)),
10559 nameXMMReg(gregOfRM(modrm)));
10560 delta += 3+1;
10561 goto decode_success;
10562 } else {
10563 /* fall through, apparently no mem case for this insn */
10564 }
10565 }
10566
10567 /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to
10568 G (lo half xmm). Upper half of G is zeroed out. */
10569 /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to
10570 G (lo half xmm). If E is mem, upper half of G is zeroed out.
10571 If E is reg, upper half of G is unchanged. */
10572 if ((insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x10)
10573 || (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x7E)) {
10574 vassert(sz == 4);
10575 modrm = getIByte(delta+3);
10576 if (epartIsReg(modrm)) {
10577 putXMMRegLane64( gregOfRM(modrm), 0,
10578 getXMMRegLane64( eregOfRM(modrm), 0 ));
10579 if (insn[0] == 0xF3/*MOVQ*/) {
10580 /* zero bits 127:64 */
10581 putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) );
10582 }
10583 DIP("movsd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10584 nameXMMReg(gregOfRM(modrm)));
10585 delta += 3+1;
10586 } else {
10587 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
10588 /* zero bits 127:64 */
10589 putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) );
10590 /* write bits 63:0 */
10591 putXMMRegLane64( gregOfRM(modrm), 0,
10592 loadLE(Ity_I64, mkexpr(addr)) );
10593 DIP("movsd %s,%s\n", dis_buf,
10594 nameXMMReg(gregOfRM(modrm)));
10595 delta += 3+alen;
10596 }
10597 goto decode_success;
10598 }
10599
10600 /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem
10601 or lo half xmm). */
10602 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x11) {
10603 vassert(sz == 4);
10604 modrm = getIByte(delta+3);
10605 if (epartIsReg(modrm)) {
10606 putXMMRegLane64( eregOfRM(modrm), 0,
10607 getXMMRegLane64( gregOfRM(modrm), 0 ));
10608 DIP("movsd %s,%s\n", nameXMMReg(gregOfRM(modrm)),
10609 nameXMMReg(eregOfRM(modrm)));
10610 delta += 3+1;
10611 } else {
10612 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
10613 storeLE( mkexpr(addr),
10614 getXMMRegLane64(gregOfRM(modrm), 0) );
10615 DIP("movsd %s,%s\n", nameXMMReg(gregOfRM(modrm)),
10616 dis_buf);
10617 delta += 3+alen;
10618 }
10619 goto decode_success;
10620 }
10621
10622 /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */
10623 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x59) {
10624 delta = dis_SSE_E_to_G_all( sorb, delta+2, "mulpd", Iop_Mul64Fx2 );
10625 goto decode_success;
10626 }
10627
10628 /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */
10629 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x59) {
10630 vassert(sz == 4);
10631 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "mulsd", Iop_Mul64F0x2 );
10632 goto decode_success;
10633 }
10634
10635 /* 66 0F 56 = ORPD -- G = G and E */
10636 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x56) {
10637 delta = dis_SSE_E_to_G_all( sorb, delta+2, "orpd", Iop_OrV128 );
10638 goto decode_success;
10639 }
10640
10641 /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */
10642 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xC6) {
10643 Int select;
10644 IRTemp sV = newTemp(Ity_V128);
10645 IRTemp dV = newTemp(Ity_V128);
10646 IRTemp s1 = newTemp(Ity_I64);
10647 IRTemp s0 = newTemp(Ity_I64);
10648 IRTemp d1 = newTemp(Ity_I64);
10649 IRTemp d0 = newTemp(Ity_I64);
10650
10651 modrm = insn[2];
10652 assign( dV, getXMMReg(gregOfRM(modrm)) );
10653
10654 if (epartIsReg(modrm)) {
10655 assign( sV, getXMMReg(eregOfRM(modrm)) );
10656 select = (Int)insn[3];
10657 delta += 2+2;
10658 DIP("shufpd $%d,%s,%s\n", select,
10659 nameXMMReg(eregOfRM(modrm)),
10660 nameXMMReg(gregOfRM(modrm)));
10661 } else {
10662 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10663 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
10664 select = (Int)insn[2+alen];
10665 delta += 3+alen;
10666 DIP("shufpd $%d,%s,%s\n", select,
10667 dis_buf,
10668 nameXMMReg(gregOfRM(modrm)));
10669 }
10670
10671 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) );
10672 assign( d0, unop(Iop_V128to64, mkexpr(dV)) );
10673 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) );
10674 assign( s0, unop(Iop_V128to64, mkexpr(sV)) );
10675
10676 # define SELD(n) mkexpr((n)==0 ? d0 : d1)
10677 # define SELS(n) mkexpr((n)==0 ? s0 : s1)
10678
10679 putXMMReg(
10680 gregOfRM(modrm),
10681 binop(Iop_64HLtoV128, SELS((select>>1)&1), SELD((select>>0)&1) )
10682 );
10683
10684 # undef SELD
10685 # undef SELS
10686
10687 goto decode_success;
10688 }
10689
10690 /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */
10691 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x51) {
10692 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2,
10693 "sqrtpd", Iop_Sqrt64Fx2 );
10694 goto decode_success;
10695 }
10696
10697 /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */
10698 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x51) {
10699 vassert(sz == 4);
10700 delta = dis_SSE_E_to_G_unary_lo64( sorb, delta+3,
10701 "sqrtsd", Iop_Sqrt64F0x2 );
10702 goto decode_success;
10703 }
10704
10705 /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */
10706 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5C) {
10707 delta = dis_SSE_E_to_G_all( sorb, delta+2, "subpd", Iop_Sub64Fx2 );
10708 goto decode_success;
10709 }
10710
10711 /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */
10712 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5C) {
10713 vassert(sz == 4);
10714 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "subsd", Iop_Sub64F0x2 );
10715 goto decode_success;
10716 }
10717
10718 /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */
10719 /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */
10720 /* These just appear to be special cases of SHUFPS */
10721 if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x15 || insn[1] == 0x14)) {
10722 IRTemp s1 = newTemp(Ity_I64);
10723 IRTemp s0 = newTemp(Ity_I64);
10724 IRTemp d1 = newTemp(Ity_I64);
10725 IRTemp d0 = newTemp(Ity_I64);
10726 IRTemp sV = newTemp(Ity_V128);
10727 IRTemp dV = newTemp(Ity_V128);
10728 Bool hi = toBool(insn[1] == 0x15);
10729
10730 modrm = insn[2];
10731 assign( dV, getXMMReg(gregOfRM(modrm)) );
10732
10733 if (epartIsReg(modrm)) {
10734 assign( sV, getXMMReg(eregOfRM(modrm)) );
10735 delta += 2+1;
10736 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
10737 nameXMMReg(eregOfRM(modrm)),
10738 nameXMMReg(gregOfRM(modrm)));
10739 } else {
10740 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10741 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
10742 delta += 2+alen;
10743 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
10744 dis_buf,
10745 nameXMMReg(gregOfRM(modrm)));
10746 }
10747
10748 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) );
10749 assign( d0, unop(Iop_V128to64, mkexpr(dV)) );
10750 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) );
10751 assign( s0, unop(Iop_V128to64, mkexpr(sV)) );
10752
10753 if (hi) {
10754 putXMMReg( gregOfRM(modrm),
10755 binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) );
10756 } else {
10757 putXMMReg( gregOfRM(modrm),
10758 binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)) );
10759 }
10760
10761 goto decode_success;
10762 }
10763
10764 /* 66 0F 57 = XORPD -- G = G and E */
10765 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x57) {
10766 delta = dis_SSE_E_to_G_all( sorb, delta+2, "xorpd", Iop_XorV128 );
10767 goto decode_success;
10768 }
10769
10770 /* 66 0F 6B = PACKSSDW */
10771 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6B) {
10772 delta = dis_SSEint_E_to_G( sorb, delta+2,
10773 "packssdw",
10774 Iop_QNarrowBin32Sto16Sx8, True );
10775 goto decode_success;
10776 }
10777
10778 /* 66 0F 63 = PACKSSWB */
10779 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x63) {
10780 delta = dis_SSEint_E_to_G( sorb, delta+2,
10781 "packsswb",
10782 Iop_QNarrowBin16Sto8Sx16, True );
10783 goto decode_success;
10784 }
10785
10786 /* 66 0F 67 = PACKUSWB */
10787 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x67) {
10788 delta = dis_SSEint_E_to_G( sorb, delta+2,
10789 "packuswb",
10790 Iop_QNarrowBin16Sto8Ux16, True );
10791 goto decode_success;
10792 }
10793
10794 /* 66 0F FC = PADDB */
10795 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFC) {
10796 delta = dis_SSEint_E_to_G( sorb, delta+2,
10797 "paddb", Iop_Add8x16, False );
10798 goto decode_success;
10799 }
10800
10801 /* 66 0F FE = PADDD */
10802 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFE) {
10803 delta = dis_SSEint_E_to_G( sorb, delta+2,
10804 "paddd", Iop_Add32x4, False );
10805 goto decode_success;
10806 }
10807
10808 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
10809 /* 0F D4 = PADDQ -- add 64x1 */
10810 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xD4) {
10811 do_MMX_preamble();
10812 delta = dis_MMXop_regmem_to_reg (
10813 sorb, delta+2, insn[1], "paddq", False );
10814 goto decode_success;
10815 }
10816
10817 /* 66 0F D4 = PADDQ */
10818 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD4) {
10819 delta = dis_SSEint_E_to_G( sorb, delta+2,
10820 "paddq", Iop_Add64x2, False );
10821 goto decode_success;
10822 }
10823
10824 /* 66 0F FD = PADDW */
10825 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFD) {
10826 delta = dis_SSEint_E_to_G( sorb, delta+2,
10827 "paddw", Iop_Add16x8, False );
10828 goto decode_success;
10829 }
10830
10831 /* 66 0F EC = PADDSB */
10832 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEC) {
10833 delta = dis_SSEint_E_to_G( sorb, delta+2,
10834 "paddsb", Iop_QAdd8Sx16, False );
10835 goto decode_success;
10836 }
10837
10838 /* 66 0F ED = PADDSW */
10839 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xED) {
10840 delta = dis_SSEint_E_to_G( sorb, delta+2,
10841 "paddsw", Iop_QAdd16Sx8, False );
10842 goto decode_success;
10843 }
10844
10845 /* 66 0F DC = PADDUSB */
10846 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDC) {
10847 delta = dis_SSEint_E_to_G( sorb, delta+2,
10848 "paddusb", Iop_QAdd8Ux16, False );
10849 goto decode_success;
10850 }
10851
10852 /* 66 0F DD = PADDUSW */
10853 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDD) {
10854 delta = dis_SSEint_E_to_G( sorb, delta+2,
10855 "paddusw", Iop_QAdd16Ux8, False );
10856 goto decode_success;
10857 }
10858
10859 /* 66 0F DB = PAND */
10860 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDB) {
10861 delta = dis_SSE_E_to_G_all( sorb, delta+2, "pand", Iop_AndV128 );
10862 goto decode_success;
10863 }
10864
10865 /* 66 0F DF = PANDN */
10866 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDF) {
10867 delta = dis_SSE_E_to_G_all_invG( sorb, delta+2, "pandn", Iop_AndV128 );
10868 goto decode_success;
10869 }
10870
10871 /* 66 0F E0 = PAVGB */
10872 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE0) {
10873 delta = dis_SSEint_E_to_G( sorb, delta+2,
10874 "pavgb", Iop_Avg8Ux16, False );
10875 goto decode_success;
10876 }
10877
10878 /* 66 0F E3 = PAVGW */
10879 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE3) {
10880 delta = dis_SSEint_E_to_G( sorb, delta+2,
10881 "pavgw", Iop_Avg16Ux8, False );
10882 goto decode_success;
10883 }
10884
10885 /* 66 0F 74 = PCMPEQB */
10886 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x74) {
10887 delta = dis_SSEint_E_to_G( sorb, delta+2,
10888 "pcmpeqb", Iop_CmpEQ8x16, False );
10889 goto decode_success;
10890 }
10891
10892 /* 66 0F 76 = PCMPEQD */
10893 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x76) {
10894 delta = dis_SSEint_E_to_G( sorb, delta+2,
10895 "pcmpeqd", Iop_CmpEQ32x4, False );
10896 goto decode_success;
10897 }
10898
10899 /* 66 0F 75 = PCMPEQW */
10900 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x75) {
10901 delta = dis_SSEint_E_to_G( sorb, delta+2,
10902 "pcmpeqw", Iop_CmpEQ16x8, False );
10903 goto decode_success;
10904 }
10905
10906 /* 66 0F 64 = PCMPGTB */
10907 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x64) {
10908 delta = dis_SSEint_E_to_G( sorb, delta+2,
10909 "pcmpgtb", Iop_CmpGT8Sx16, False );
10910 goto decode_success;
10911 }
10912
10913 /* 66 0F 66 = PCMPGTD */
10914 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x66) {
10915 delta = dis_SSEint_E_to_G( sorb, delta+2,
10916 "pcmpgtd", Iop_CmpGT32Sx4, False );
10917 goto decode_success;
10918 }
10919
10920 /* 66 0F 65 = PCMPGTW */
10921 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x65) {
10922 delta = dis_SSEint_E_to_G( sorb, delta+2,
10923 "pcmpgtw", Iop_CmpGT16Sx8, False );
10924 goto decode_success;
10925 }
10926
10927 /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put
10928 zero-extend of it in ireg(G). */
10929 if (insn[0] == 0x0F && insn[1] == 0xC5) {
10930 modrm = insn[2];
10931 if (sz == 2 && epartIsReg(modrm)) {
10932 t5 = newTemp(Ity_V128);
10933 t4 = newTemp(Ity_I16);
10934 assign(t5, getXMMReg(eregOfRM(modrm)));
10935 breakup128to32s( t5, &t3, &t2, &t1, &t0 );
10936 switch (insn[3] & 7) {
10937 case 0: assign(t4, unop(Iop_32to16, mkexpr(t0))); break;
10938 case 1: assign(t4, unop(Iop_32HIto16, mkexpr(t0))); break;
10939 case 2: assign(t4, unop(Iop_32to16, mkexpr(t1))); break;
10940 case 3: assign(t4, unop(Iop_32HIto16, mkexpr(t1))); break;
10941 case 4: assign(t4, unop(Iop_32to16, mkexpr(t2))); break;
10942 case 5: assign(t4, unop(Iop_32HIto16, mkexpr(t2))); break;
10943 case 6: assign(t4, unop(Iop_32to16, mkexpr(t3))); break;
10944 case 7: assign(t4, unop(Iop_32HIto16, mkexpr(t3))); break;
10945 default: vassert(0); /*NOTREACHED*/
10946 }
10947 putIReg(4, gregOfRM(modrm), unop(Iop_16Uto32, mkexpr(t4)));
10948 DIP("pextrw $%d,%s,%s\n",
10949 (Int)insn[3], nameXMMReg(eregOfRM(modrm)),
10950 nameIReg(4,gregOfRM(modrm)));
10951 delta += 4;
10952 goto decode_success;
10953 }
10954 /* else fall through */
10955 }
10956
10957 /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
10958 put it into the specified lane of xmm(G). */
10959 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xC4) {
10960 Int lane;
10961 t4 = newTemp(Ity_I16);
10962 modrm = insn[2];
10963
10964 if (epartIsReg(modrm)) {
10965 assign(t4, getIReg(2, eregOfRM(modrm)));
10966 delta += 3+1;
10967 lane = insn[3+1-1];
10968 DIP("pinsrw $%d,%s,%s\n", (Int)lane,
10969 nameIReg(2,eregOfRM(modrm)),
10970 nameXMMReg(gregOfRM(modrm)));
10971 } else {
10972 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10973 delta += 3+alen;
10974 lane = insn[3+alen-1];
10975 assign(t4, loadLE(Ity_I16, mkexpr(addr)));
10976 DIP("pinsrw $%d,%s,%s\n", (Int)lane,
10977 dis_buf,
10978 nameXMMReg(gregOfRM(modrm)));
10979 }
10980
10981 putXMMRegLane16( gregOfRM(modrm), lane & 7, mkexpr(t4) );
10982 goto decode_success;
10983 }
10984
10985 /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from
10986 E(xmm or mem) to G(xmm) */
10987 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF5) {
10988 IRTemp s1V = newTemp(Ity_V128);
10989 IRTemp s2V = newTemp(Ity_V128);
10990 IRTemp dV = newTemp(Ity_V128);
10991 IRTemp s1Hi = newTemp(Ity_I64);
10992 IRTemp s1Lo = newTemp(Ity_I64);
10993 IRTemp s2Hi = newTemp(Ity_I64);
10994 IRTemp s2Lo = newTemp(Ity_I64);
10995 IRTemp dHi = newTemp(Ity_I64);
10996 IRTemp dLo = newTemp(Ity_I64);
10997 modrm = insn[2];
10998 if (epartIsReg(modrm)) {
10999 assign( s1V, getXMMReg(eregOfRM(modrm)) );
11000 delta += 2+1;
11001 DIP("pmaddwd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
11002 nameXMMReg(gregOfRM(modrm)));
11003 } else {
11004 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11005 assign( s1V, loadLE(Ity_V128, mkexpr(addr)) );
11006 delta += 2+alen;
11007 DIP("pmaddwd %s,%s\n", dis_buf,
11008 nameXMMReg(gregOfRM(modrm)));
11009 }
11010 assign( s2V, getXMMReg(gregOfRM(modrm)) );
11011 assign( s1Hi, unop(Iop_V128HIto64, mkexpr(s1V)) );
11012 assign( s1Lo, unop(Iop_V128to64, mkexpr(s1V)) );
11013 assign( s2Hi, unop(Iop_V128HIto64, mkexpr(s2V)) );
11014 assign( s2Lo, unop(Iop_V128to64, mkexpr(s2V)) );
11015 assign( dHi, mkIRExprCCall(
11016 Ity_I64, 0/*regparms*/,
11017 "x86g_calculate_mmx_pmaddwd",
11018 &x86g_calculate_mmx_pmaddwd,
11019 mkIRExprVec_2( mkexpr(s1Hi), mkexpr(s2Hi))
11020 ));
11021 assign( dLo, mkIRExprCCall(
11022 Ity_I64, 0/*regparms*/,
11023 "x86g_calculate_mmx_pmaddwd",
11024 &x86g_calculate_mmx_pmaddwd,
11025 mkIRExprVec_2( mkexpr(s1Lo), mkexpr(s2Lo))
11026 ));
11027 assign( dV, binop(Iop_64HLtoV128, mkexpr(dHi), mkexpr(dLo))) ;
11028 putXMMReg(gregOfRM(modrm), mkexpr(dV));
11029 goto decode_success;
11030 }
11031
11032 /* 66 0F EE = PMAXSW -- 16x8 signed max */
11033 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEE) {
11034 delta = dis_SSEint_E_to_G( sorb, delta+2,
11035 "pmaxsw", Iop_Max16Sx8, False );
11036 goto decode_success;
11037 }
11038
11039 /* 66 0F DE = PMAXUB -- 8x16 unsigned max */
11040 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDE) {
11041 delta = dis_SSEint_E_to_G( sorb, delta+2,
11042 "pmaxub", Iop_Max8Ux16, False );
11043 goto decode_success;
11044 }
11045
11046 /* 66 0F EA = PMINSW -- 16x8 signed min */
11047 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEA) {
11048 delta = dis_SSEint_E_to_G( sorb, delta+2,
11049 "pminsw", Iop_Min16Sx8, False );
11050 goto decode_success;
11051 }
11052
11053 /* 66 0F DA = PMINUB -- 8x16 unsigned min */
11054 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDA) {
11055 delta = dis_SSEint_E_to_G( sorb, delta+2,
11056 "pminub", Iop_Min8Ux16, False );
11057 goto decode_success;
11058 }
11059
11060 /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16 lanes
11061 in xmm(E), turn them into a byte, and put zero-extend of it in
11062 ireg(G). */
11063 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD7) {
11064 modrm = insn[2];
11065 if (epartIsReg(modrm)) {
11066 t0 = newTemp(Ity_I64);
11067 t1 = newTemp(Ity_I64);
11068 assign(t0, getXMMRegLane64(eregOfRM(modrm), 0));
11069 assign(t1, getXMMRegLane64(eregOfRM(modrm), 1));
11070 t5 = newTemp(Ity_I32);
11071 assign(t5,
11072 unop(Iop_16Uto32,
11073 binop(Iop_8HLto16,
11074 unop(Iop_GetMSBs8x8, mkexpr(t1)),
11075 unop(Iop_GetMSBs8x8, mkexpr(t0)))));
11076 putIReg(4, gregOfRM(modrm), mkexpr(t5));
11077 DIP("pmovmskb %s,%s\n", nameXMMReg(eregOfRM(modrm)),
11078 nameIReg(4,gregOfRM(modrm)));
11079 delta += 3;
11080 goto decode_success;
11081 }
11082 /* else fall through */
11083 }
11084
11085 /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */
11086 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE4) {
11087 delta = dis_SSEint_E_to_G( sorb, delta+2,
11088 "pmulhuw", Iop_MulHi16Ux8, False );
11089 goto decode_success;
11090 }
11091
11092 /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */
11093 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE5) {
11094 delta = dis_SSEint_E_to_G( sorb, delta+2,
11095 "pmulhw", Iop_MulHi16Sx8, False );
11096 goto decode_success;
11097 }
11098
11099 /* 66 0F D5 = PMULHL -- 16x8 multiply */
11100 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD5) {
11101 delta = dis_SSEint_E_to_G( sorb, delta+2,
11102 "pmullw", Iop_Mul16x8, False );
11103 goto decode_success;
11104 }
11105
11106 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
11107 /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
11108 0 to form 64-bit result */
11109 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF4) {
11110 IRTemp sV = newTemp(Ity_I64);
11111 IRTemp dV = newTemp(Ity_I64);
11112 t1 = newTemp(Ity_I32);
11113 t0 = newTemp(Ity_I32);
11114 modrm = insn[2];
11115
11116 do_MMX_preamble();
11117 assign( dV, getMMXReg(gregOfRM(modrm)) );
11118
11119 if (epartIsReg(modrm)) {
11120 assign( sV, getMMXReg(eregOfRM(modrm)) );
11121 delta += 2+1;
11122 DIP("pmuludq %s,%s\n", nameMMXReg(eregOfRM(modrm)),
11123 nameMMXReg(gregOfRM(modrm)));
11124 } else {
11125 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11126 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
11127 delta += 2+alen;
11128 DIP("pmuludq %s,%s\n", dis_buf,
11129 nameMMXReg(gregOfRM(modrm)));
11130 }
11131
11132 assign( t0, unop(Iop_64to32, mkexpr(dV)) );
11133 assign( t1, unop(Iop_64to32, mkexpr(sV)) );
11134 putMMXReg( gregOfRM(modrm),
11135 binop( Iop_MullU32, mkexpr(t0), mkexpr(t1) ) );
11136 goto decode_success;
11137 }
11138
11139 /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
11140 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit
11141 half */
11142 /* This is a really poor translation -- could be improved if
11143 performance critical */
11144 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF4) {
11145 IRTemp sV, dV;
11146 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
11147 sV = newTemp(Ity_V128);
11148 dV = newTemp(Ity_V128);
11149 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
11150 t1 = newTemp(Ity_I64);
11151 t0 = newTemp(Ity_I64);
11152 modrm = insn[2];
11153 assign( dV, getXMMReg(gregOfRM(modrm)) );
11154
11155 if (epartIsReg(modrm)) {
11156 assign( sV, getXMMReg(eregOfRM(modrm)) );
11157 delta += 2+1;
11158 DIP("pmuludq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
11159 nameXMMReg(gregOfRM(modrm)));
11160 } else {
11161 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11162 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11163 delta += 2+alen;
11164 DIP("pmuludq %s,%s\n", dis_buf,
11165 nameXMMReg(gregOfRM(modrm)));
11166 }
11167
11168 breakup128to32s( dV, &d3, &d2, &d1, &d0 );
11169 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
11170
11171 assign( t0, binop( Iop_MullU32, mkexpr(d0), mkexpr(s0)) );
11172 putXMMRegLane64( gregOfRM(modrm), 0, mkexpr(t0) );
11173 assign( t1, binop( Iop_MullU32, mkexpr(d2), mkexpr(s2)) );
11174 putXMMRegLane64( gregOfRM(modrm), 1, mkexpr(t1) );
11175 goto decode_success;
11176 }
11177
11178 /* 66 0F EB = POR */
11179 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEB) {
11180 delta = dis_SSE_E_to_G_all( sorb, delta+2, "por", Iop_OrV128 );
11181 goto decode_success;
11182 }
11183
11184 /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs
11185 from E(xmm or mem) to G(xmm) */
11186 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF6) {
11187 IRTemp s1V = newTemp(Ity_V128);
11188 IRTemp s2V = newTemp(Ity_V128);
11189 IRTemp dV = newTemp(Ity_V128);
11190 IRTemp s1Hi = newTemp(Ity_I64);
11191 IRTemp s1Lo = newTemp(Ity_I64);
11192 IRTemp s2Hi = newTemp(Ity_I64);
11193 IRTemp s2Lo = newTemp(Ity_I64);
11194 IRTemp dHi = newTemp(Ity_I64);
11195 IRTemp dLo = newTemp(Ity_I64);
11196 modrm = insn[2];
11197 if (epartIsReg(modrm)) {
11198 assign( s1V, getXMMReg(eregOfRM(modrm)) );
11199 delta += 2+1;
11200 DIP("psadbw %s,%s\n", nameXMMReg(eregOfRM(modrm)),
11201 nameXMMReg(gregOfRM(modrm)));
11202 } else {
11203 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11204 assign( s1V, loadLE(Ity_V128, mkexpr(addr)) );
11205 delta += 2+alen;
11206 DIP("psadbw %s,%s\n", dis_buf,
11207 nameXMMReg(gregOfRM(modrm)));
11208 }
11209 assign( s2V, getXMMReg(gregOfRM(modrm)) );
11210 assign( s1Hi, unop(Iop_V128HIto64, mkexpr(s1V)) );
11211 assign( s1Lo, unop(Iop_V128to64, mkexpr(s1V)) );
11212 assign( s2Hi, unop(Iop_V128HIto64, mkexpr(s2V)) );
11213 assign( s2Lo, unop(Iop_V128to64, mkexpr(s2V)) );
11214 assign( dHi, mkIRExprCCall(
11215 Ity_I64, 0/*regparms*/,
11216 "x86g_calculate_mmx_psadbw",
11217 &x86g_calculate_mmx_psadbw,
11218 mkIRExprVec_2( mkexpr(s1Hi), mkexpr(s2Hi))
11219 ));
11220 assign( dLo, mkIRExprCCall(
11221 Ity_I64, 0/*regparms*/,
11222 "x86g_calculate_mmx_psadbw",
11223 &x86g_calculate_mmx_psadbw,
11224 mkIRExprVec_2( mkexpr(s1Lo), mkexpr(s2Lo))
11225 ));
11226 assign( dV, binop(Iop_64HLtoV128, mkexpr(dHi), mkexpr(dLo))) ;
11227 putXMMReg(gregOfRM(modrm), mkexpr(dV));
11228 goto decode_success;
11229 }
11230
11231 /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */
11232 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x70) {
11233 Int order;
11234 IRTemp sV, dV, s3, s2, s1, s0;
11235 s3 = s2 = s1 = s0 = IRTemp_INVALID;
11236 sV = newTemp(Ity_V128);
11237 dV = newTemp(Ity_V128);
11238 modrm = insn[2];
11239 if (epartIsReg(modrm)) {
11240 assign( sV, getXMMReg(eregOfRM(modrm)) );
11241 order = (Int)insn[3];
11242 delta += 2+2;
11243 DIP("pshufd $%d,%s,%s\n", order,
11244 nameXMMReg(eregOfRM(modrm)),
11245 nameXMMReg(gregOfRM(modrm)));
11246 } else {
11247 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11248 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11249 order = (Int)insn[2+alen];
11250 delta += 3+alen;
11251 DIP("pshufd $%d,%s,%s\n", order,
11252 dis_buf,
11253 nameXMMReg(gregOfRM(modrm)));
11254 }
11255 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
11256
11257 # define SEL(n) \
11258 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11259 assign(dV,
11260 mk128from32s( SEL((order>>6)&3), SEL((order>>4)&3),
11261 SEL((order>>2)&3), SEL((order>>0)&3) )
11262 );
11263 putXMMReg(gregOfRM(modrm), mkexpr(dV));
11264 # undef SEL
11265 goto decode_success;
11266 }
11267
11268 /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or
11269 mem) to G(xmm), and copy lower half */
11270 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x70) {
11271 Int order;
11272 IRTemp sVhi, dVhi, sV, dV, s3, s2, s1, s0;
11273 s3 = s2 = s1 = s0 = IRTemp_INVALID;
11274 sV = newTemp(Ity_V128);
11275 dV = newTemp(Ity_V128);
11276 sVhi = newTemp(Ity_I64);
11277 dVhi = newTemp(Ity_I64);
11278 modrm = insn[3];
11279 if (epartIsReg(modrm)) {
11280 assign( sV, getXMMReg(eregOfRM(modrm)) );
11281 order = (Int)insn[4];
11282 delta += 4+1;
11283 DIP("pshufhw $%d,%s,%s\n", order,
11284 nameXMMReg(eregOfRM(modrm)),
11285 nameXMMReg(gregOfRM(modrm)));
11286 } else {
11287 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11288 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11289 order = (Int)insn[3+alen];
11290 delta += 4+alen;
11291 DIP("pshufhw $%d,%s,%s\n", order,
11292 dis_buf,
11293 nameXMMReg(gregOfRM(modrm)));
11294 }
11295 assign( sVhi, unop(Iop_V128HIto64, mkexpr(sV)) );
11296 breakup64to16s( sVhi, &s3, &s2, &s1, &s0 );
11297
11298 # define SEL(n) \
11299 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11300 assign(dVhi,
11301 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
11302 SEL((order>>2)&3), SEL((order>>0)&3) )
11303 );
11304 assign(dV, binop( Iop_64HLtoV128,
11305 mkexpr(dVhi),
11306 unop(Iop_V128to64, mkexpr(sV))) );
11307 putXMMReg(gregOfRM(modrm), mkexpr(dV));
11308 # undef SEL
11309 goto decode_success;
11310 }
11311
11312 /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or
11313 mem) to G(xmm), and copy upper half */
11314 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x70) {
11315 Int order;
11316 IRTemp sVlo, dVlo, sV, dV, s3, s2, s1, s0;
11317 s3 = s2 = s1 = s0 = IRTemp_INVALID;
11318 sV = newTemp(Ity_V128);
11319 dV = newTemp(Ity_V128);
11320 sVlo = newTemp(Ity_I64);
11321 dVlo = newTemp(Ity_I64);
11322 modrm = insn[3];
11323 if (epartIsReg(modrm)) {
11324 assign( sV, getXMMReg(eregOfRM(modrm)) );
11325 order = (Int)insn[4];
11326 delta += 4+1;
11327 DIP("pshuflw $%d,%s,%s\n", order,
11328 nameXMMReg(eregOfRM(modrm)),
11329 nameXMMReg(gregOfRM(modrm)));
11330 } else {
11331 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11332 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11333 order = (Int)insn[3+alen];
11334 delta += 4+alen;
11335 DIP("pshuflw $%d,%s,%s\n", order,
11336 dis_buf,
11337 nameXMMReg(gregOfRM(modrm)));
11338 }
11339 assign( sVlo, unop(Iop_V128to64, mkexpr(sV)) );
11340 breakup64to16s( sVlo, &s3, &s2, &s1, &s0 );
11341
11342 # define SEL(n) \
11343 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11344 assign(dVlo,
11345 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
11346 SEL((order>>2)&3), SEL((order>>0)&3) )
11347 );
11348 assign(dV, binop( Iop_64HLtoV128,
11349 unop(Iop_V128HIto64, mkexpr(sV)),
11350 mkexpr(dVlo) ) );
11351 putXMMReg(gregOfRM(modrm), mkexpr(dV));
11352 # undef SEL
11353 goto decode_success;
11354 }
11355
11356 /* 66 0F 72 /6 ib = PSLLD by immediate */
11357 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x72
11358 && epartIsReg(insn[2])
11359 && gregOfRM(insn[2]) == 6) {
11360 delta = dis_SSE_shiftE_imm( delta+2, "pslld", Iop_ShlN32x4 );
11361 goto decode_success;
11362 }
11363
11364 /* 66 0F F2 = PSLLD by E */
11365 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF2) {
11366 delta = dis_SSE_shiftG_byE( sorb, delta+2, "pslld", Iop_ShlN32x4 );
11367 goto decode_success;
11368 }
11369
11370 /* 66 0F 73 /7 ib = PSLLDQ by immediate */
11371 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73
11372 && epartIsReg(insn[2])
11373 && gregOfRM(insn[2]) == 7) {
11374 IRTemp sV, dV, hi64, lo64, hi64r, lo64r;
11375 Int imm = (Int)insn[3];
11376 Int reg = eregOfRM(insn[2]);
11377 DIP("pslldq $%d,%s\n", imm, nameXMMReg(reg));
11378 vassert(imm >= 0 && imm <= 255);
11379 delta += 4;
11380
11381 sV = newTemp(Ity_V128);
11382 dV = newTemp(Ity_V128);
11383 hi64 = newTemp(Ity_I64);
11384 lo64 = newTemp(Ity_I64);
11385 hi64r = newTemp(Ity_I64);
11386 lo64r = newTemp(Ity_I64);
11387
11388 if (imm >= 16) {
11389 putXMMReg(reg, mkV128(0x0000));
11390 goto decode_success;
11391 }
11392
11393 assign( sV, getXMMReg(reg) );
11394 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) );
11395 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) );
11396
11397 if (imm == 0) {
11398 assign( lo64r, mkexpr(lo64) );
11399 assign( hi64r, mkexpr(hi64) );
11400 }
11401 else
11402 if (imm == 8) {
11403 assign( lo64r, mkU64(0) );
11404 assign( hi64r, mkexpr(lo64) );
11405 }
11406 else
11407 if (imm > 8) {
11408 assign( lo64r, mkU64(0) );
11409 assign( hi64r, binop( Iop_Shl64,
11410 mkexpr(lo64),
11411 mkU8( 8*(imm-8) ) ));
11412 } else {
11413 assign( lo64r, binop( Iop_Shl64,
11414 mkexpr(lo64),
11415 mkU8(8 * imm) ));
11416 assign( hi64r,
11417 binop( Iop_Or64,
11418 binop(Iop_Shl64, mkexpr(hi64),
11419 mkU8(8 * imm)),
11420 binop(Iop_Shr64, mkexpr(lo64),
11421 mkU8(8 * (8 - imm)) )
11422 )
11423 );
11424 }
11425 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) );
11426 putXMMReg(reg, mkexpr(dV));
11427 goto decode_success;
11428 }
11429
11430 /* 66 0F 73 /6 ib = PSLLQ by immediate */
11431 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73
11432 && epartIsReg(insn[2])
11433 && gregOfRM(insn[2]) == 6) {
11434 delta = dis_SSE_shiftE_imm( delta+2, "psllq", Iop_ShlN64x2 );
11435 goto decode_success;
11436 }
11437
11438 /* 66 0F F3 = PSLLQ by E */
11439 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF3) {
11440 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psllq", Iop_ShlN64x2 );
11441 goto decode_success;
11442 }
11443
11444 /* 66 0F 71 /6 ib = PSLLW by immediate */
11445 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x71
11446 && epartIsReg(insn[2])
11447 && gregOfRM(insn[2]) == 6) {
11448 delta = dis_SSE_shiftE_imm( delta+2, "psllw", Iop_ShlN16x8 );
11449 goto decode_success;
11450 }
11451
11452 /* 66 0F F1 = PSLLW by E */
11453 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF1) {
11454 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psllw", Iop_ShlN16x8 );
11455 goto decode_success;
11456 }
11457
11458 /* 66 0F 72 /4 ib = PSRAD by immediate */
11459 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x72
11460 && epartIsReg(insn[2])
11461 && gregOfRM(insn[2]) == 4) {
11462 delta = dis_SSE_shiftE_imm( delta+2, "psrad", Iop_SarN32x4 );
11463 goto decode_success;
11464 }
11465
11466 /* 66 0F E2 = PSRAD by E */
11467 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE2) {
11468 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrad", Iop_SarN32x4 );
11469 goto decode_success;
11470 }
11471
11472 /* 66 0F 71 /4 ib = PSRAW by immediate */
11473 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x71
11474 && epartIsReg(insn[2])
11475 && gregOfRM(insn[2]) == 4) {
11476 delta = dis_SSE_shiftE_imm( delta+2, "psraw", Iop_SarN16x8 );
11477 goto decode_success;
11478 }
11479
11480 /* 66 0F E1 = PSRAW by E */
11481 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE1) {
11482 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psraw", Iop_SarN16x8 );
11483 goto decode_success;
11484 }
11485
11486 /* 66 0F 72 /2 ib = PSRLD by immediate */
11487 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x72
11488 && epartIsReg(insn[2])
11489 && gregOfRM(insn[2]) == 2) {
11490 delta = dis_SSE_shiftE_imm( delta+2, "psrld", Iop_ShrN32x4 );
11491 goto decode_success;
11492 }
11493
11494 /* 66 0F D2 = PSRLD by E */
11495 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD2) {
11496 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrld", Iop_ShrN32x4 );
11497 goto decode_success;
11498 }
11499
11500 /* 66 0F 73 /3 ib = PSRLDQ by immediate */
11501 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73
11502 && epartIsReg(insn[2])
11503 && gregOfRM(insn[2]) == 3) {
11504 IRTemp sV, dV, hi64, lo64, hi64r, lo64r;
11505 Int imm = (Int)insn[3];
11506 Int reg = eregOfRM(insn[2]);
11507 DIP("psrldq $%d,%s\n", imm, nameXMMReg(reg));
11508 vassert(imm >= 0 && imm <= 255);
11509 delta += 4;
11510
11511 sV = newTemp(Ity_V128);
11512 dV = newTemp(Ity_V128);
11513 hi64 = newTemp(Ity_I64);
11514 lo64 = newTemp(Ity_I64);
11515 hi64r = newTemp(Ity_I64);
11516 lo64r = newTemp(Ity_I64);
11517
11518 if (imm >= 16) {
11519 putXMMReg(reg, mkV128(0x0000));
11520 goto decode_success;
11521 }
11522
11523 assign( sV, getXMMReg(reg) );
11524 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) );
11525 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) );
11526
11527 if (imm == 0) {
11528 assign( lo64r, mkexpr(lo64) );
11529 assign( hi64r, mkexpr(hi64) );
11530 }
11531 else
11532 if (imm == 8) {
11533 assign( hi64r, mkU64(0) );
11534 assign( lo64r, mkexpr(hi64) );
11535 }
11536 else
11537 if (imm > 8) {
11538 assign( hi64r, mkU64(0) );
11539 assign( lo64r, binop( Iop_Shr64,
11540 mkexpr(hi64),
11541 mkU8( 8*(imm-8) ) ));
11542 } else {
11543 assign( hi64r, binop( Iop_Shr64,
11544 mkexpr(hi64),
11545 mkU8(8 * imm) ));
11546 assign( lo64r,
11547 binop( Iop_Or64,
11548 binop(Iop_Shr64, mkexpr(lo64),
11549 mkU8(8 * imm)),
11550 binop(Iop_Shl64, mkexpr(hi64),
11551 mkU8(8 * (8 - imm)) )
11552 )
11553 );
11554 }
11555
11556 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) );
11557 putXMMReg(reg, mkexpr(dV));
11558 goto decode_success;
11559 }
11560
11561 /* 66 0F 73 /2 ib = PSRLQ by immediate */
11562 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73
11563 && epartIsReg(insn[2])
11564 && gregOfRM(insn[2]) == 2) {
11565 delta = dis_SSE_shiftE_imm( delta+2, "psrlq", Iop_ShrN64x2 );
11566 goto decode_success;
11567 }
11568
11569 /* 66 0F D3 = PSRLQ by E */
11570 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD3) {
11571 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrlq", Iop_ShrN64x2 );
11572 goto decode_success;
11573 }
11574
11575 /* 66 0F 71 /2 ib = PSRLW by immediate */
11576 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x71
11577 && epartIsReg(insn[2])
11578 && gregOfRM(insn[2]) == 2) {
11579 delta = dis_SSE_shiftE_imm( delta+2, "psrlw", Iop_ShrN16x8 );
11580 goto decode_success;
11581 }
11582
11583 /* 66 0F D1 = PSRLW by E */
11584 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD1) {
11585 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrlw", Iop_ShrN16x8 );
11586 goto decode_success;
11587 }
11588
11589 /* 66 0F F8 = PSUBB */
11590 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF8) {
11591 delta = dis_SSEint_E_to_G( sorb, delta+2,
11592 "psubb", Iop_Sub8x16, False );
11593 goto decode_success;
11594 }
11595
11596 /* 66 0F FA = PSUBD */
11597 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFA) {
11598 delta = dis_SSEint_E_to_G( sorb, delta+2,
11599 "psubd", Iop_Sub32x4, False );
11600 goto decode_success;
11601 }
11602
11603 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
11604 /* 0F FB = PSUBQ -- sub 64x1 */
11605 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xFB) {
11606 do_MMX_preamble();
11607 delta = dis_MMXop_regmem_to_reg (
11608 sorb, delta+2, insn[1], "psubq", False );
11609 goto decode_success;
11610 }
11611
11612 /* 66 0F FB = PSUBQ */
11613 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFB) {
11614 delta = dis_SSEint_E_to_G( sorb, delta+2,
11615 "psubq", Iop_Sub64x2, False );
11616 goto decode_success;
11617 }
11618
11619 /* 66 0F F9 = PSUBW */
11620 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF9) {
11621 delta = dis_SSEint_E_to_G( sorb, delta+2,
11622 "psubw", Iop_Sub16x8, False );
11623 goto decode_success;
11624 }
11625
11626 /* 66 0F E8 = PSUBSB */
11627 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE8) {
11628 delta = dis_SSEint_E_to_G( sorb, delta+2,
11629 "psubsb", Iop_QSub8Sx16, False );
11630 goto decode_success;
11631 }
11632
11633 /* 66 0F E9 = PSUBSW */
11634 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE9) {
11635 delta = dis_SSEint_E_to_G( sorb, delta+2,
11636 "psubsw", Iop_QSub16Sx8, False );
11637 goto decode_success;
11638 }
11639
11640 /* 66 0F D8 = PSUBSB */
11641 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD8) {
11642 delta = dis_SSEint_E_to_G( sorb, delta+2,
11643 "psubusb", Iop_QSub8Ux16, False );
11644 goto decode_success;
11645 }
11646
11647 /* 66 0F D9 = PSUBSW */
11648 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD9) {
11649 delta = dis_SSEint_E_to_G( sorb, delta+2,
11650 "psubusw", Iop_QSub16Ux8, False );
11651 goto decode_success;
11652 }
11653
11654 /* 66 0F 68 = PUNPCKHBW */
11655 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x68) {
11656 delta = dis_SSEint_E_to_G( sorb, delta+2,
11657 "punpckhbw",
11658 Iop_InterleaveHI8x16, True );
11659 goto decode_success;
11660 }
11661
11662 /* 66 0F 6A = PUNPCKHDQ */
11663 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6A) {
11664 delta = dis_SSEint_E_to_G( sorb, delta+2,
11665 "punpckhdq",
11666 Iop_InterleaveHI32x4, True );
11667 goto decode_success;
11668 }
11669
11670 /* 66 0F 6D = PUNPCKHQDQ */
11671 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6D) {
11672 delta = dis_SSEint_E_to_G( sorb, delta+2,
11673 "punpckhqdq",
11674 Iop_InterleaveHI64x2, True );
11675 goto decode_success;
11676 }
11677
11678 /* 66 0F 69 = PUNPCKHWD */
11679 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x69) {
11680 delta = dis_SSEint_E_to_G( sorb, delta+2,
11681 "punpckhwd",
11682 Iop_InterleaveHI16x8, True );
11683 goto decode_success;
11684 }
11685
11686 /* 66 0F 60 = PUNPCKLBW */
11687 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x60) {
11688 delta = dis_SSEint_E_to_G( sorb, delta+2,
11689 "punpcklbw",
11690 Iop_InterleaveLO8x16, True );
11691 goto decode_success;
11692 }
11693
11694 /* 66 0F 62 = PUNPCKLDQ */
11695 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x62) {
11696 delta = dis_SSEint_E_to_G( sorb, delta+2,
11697 "punpckldq",
11698 Iop_InterleaveLO32x4, True );
11699 goto decode_success;
11700 }
11701
11702 /* 66 0F 6C = PUNPCKLQDQ */
11703 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6C) {
11704 delta = dis_SSEint_E_to_G( sorb, delta+2,
11705 "punpcklqdq",
11706 Iop_InterleaveLO64x2, True );
11707 goto decode_success;
11708 }
11709
11710 /* 66 0F 61 = PUNPCKLWD */
11711 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x61) {
11712 delta = dis_SSEint_E_to_G( sorb, delta+2,
11713 "punpcklwd",
11714 Iop_InterleaveLO16x8, True );
11715 goto decode_success;
11716 }
11717
11718 /* 66 0F EF = PXOR */
11719 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEF) {
11720 delta = dis_SSE_E_to_G_all( sorb, delta+2, "pxor", Iop_XorV128 );
11721 goto decode_success;
11722 }
11723
11724 //-- /* FXSAVE/FXRSTOR m32 -- load/store the FPU/MMX/SSE state. */
11725 //-- if (insn[0] == 0x0F && insn[1] == 0xAE
11726 //-- && (!epartIsReg(insn[2]))
11727 //-- && (gregOfRM(insn[2]) == 1 || gregOfRM(insn[2]) == 0) ) {
11728 //-- Bool store = gregOfRM(insn[2]) == 0;
11729 //-- vg_assert(sz == 4);
11730 //-- pair = disAMode ( cb, sorb, eip+2, dis_buf );
11731 //-- t1 = LOW24(pair);
11732 //-- eip += 2+HI8(pair);
11733 //-- uInstr3(cb, store ? SSE2a_MemWr : SSE2a_MemRd, 512,
11734 //-- Lit16, (((UShort)insn[0]) << 8) | (UShort)insn[1],
11735 //-- Lit16, (UShort)insn[2],
11736 //-- TempReg, t1 );
11737 //-- DIP("fx%s %s\n", store ? "save" : "rstor", dis_buf );
11738 //-- goto decode_success;
11739 //-- }
11740
11741 /* 0F AE /7 = CLFLUSH -- flush cache line */
11742 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xAE
11743 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 7) {
11744
11745 /* This is something of a hack. We need to know the size of the
11746 cache line containing addr. Since we don't (easily), assume
11747 256 on the basis that no real cache would have a line that
11748 big. It's safe to invalidate more stuff than we need, just
11749 inefficient. */
11750 UInt lineszB = 256;
11751
11752 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11753 delta += 2+alen;
11754
11755 /* Round addr down to the start of the containing block. */
11756 stmt( IRStmt_Put(
11757 OFFB_CMSTART,
11758 binop( Iop_And32,
11759 mkexpr(addr),
11760 mkU32( ~(lineszB-1) ))) );
11761
11762 stmt( IRStmt_Put(OFFB_CMLEN, mkU32(lineszB) ) );
11763
11764 jmp_lit(&dres, Ijk_InvalICache, (Addr32)(guest_EIP_bbstart+delta));
11765
11766 DIP("clflush %s\n", dis_buf);
11767 goto decode_success;
11768 }
11769
11770 /* ---------------------------------------------------- */
11771 /* --- end of the SSE2 decoder. --- */
11772 /* ---------------------------------------------------- */
11773
11774 /* ---------------------------------------------------- */
11775 /* --- start of the SSE3 decoder. --- */
11776 /* ---------------------------------------------------- */
11777
11778 /* Skip parts of the decoder which don't apply given the stated
11779 guest subarchitecture. */
11780 /* if (0 == (archinfo->hwcaps & VEX_HWCAPS_X86_SSE3)) */
11781 /* In fact this is highly bogus; we accept SSE3 insns even on a
11782 SSE2-only guest since they turn into IR which can be re-emitted
11783 successfully on an SSE2 host. */
11784 if (0 == (archinfo->hwcaps & VEX_HWCAPS_X86_SSE2))
11785 goto after_sse_decoders; /* no SSE3 capabilities */
11786
11787 insn = (UChar*)&guest_code[delta];
11788
11789 /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm),
11790 duplicating some lanes (2:2:0:0). */
11791 /* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm),
11792 duplicating some lanes (3:3:1:1). */
11793 if (sz == 4 && insn[0] == 0xF3 && insn[1] == 0x0F
11794 && (insn[2] == 0x12 || insn[2] == 0x16)) {
11795 IRTemp s3, s2, s1, s0;
11796 IRTemp sV = newTemp(Ity_V128);
11797 Bool isH = insn[2] == 0x16;
11798 s3 = s2 = s1 = s0 = IRTemp_INVALID;
11799
11800 modrm = insn[3];
11801 if (epartIsReg(modrm)) {
11802 assign( sV, getXMMReg( eregOfRM(modrm)) );
11803 DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l',
11804 nameXMMReg(eregOfRM(modrm)),
11805 nameXMMReg(gregOfRM(modrm)));
11806 delta += 3+1;
11807 } else {
11808 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11809 gen_SEGV_if_not_16_aligned( addr );
11810 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11811 DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l',
11812 dis_buf,
11813 nameXMMReg(gregOfRM(modrm)));
11814 delta += 3+alen;
11815 }
11816
11817 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
11818 putXMMReg( gregOfRM(modrm),
11819 isH ? mk128from32s( s3, s3, s1, s1 )
11820 : mk128from32s( s2, s2, s0, s0 ) );
11821 goto decode_success;
11822 }
11823
11824 /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm),
11825 duplicating some lanes (0:1:0:1). */
11826 if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x12) {
11827 IRTemp sV = newTemp(Ity_V128);
11828 IRTemp d0 = newTemp(Ity_I64);
11829
11830 modrm = insn[3];
11831 if (epartIsReg(modrm)) {
11832 assign( sV, getXMMReg( eregOfRM(modrm)) );
11833 DIP("movddup %s,%s\n", nameXMMReg(eregOfRM(modrm)),
11834 nameXMMReg(gregOfRM(modrm)));
11835 delta += 3+1;
11836 assign ( d0, unop(Iop_V128to64, mkexpr(sV)) );
11837 } else {
11838 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11839 assign( d0, loadLE(Ity_I64, mkexpr(addr)) );
11840 DIP("movddup %s,%s\n", dis_buf,
11841 nameXMMReg(gregOfRM(modrm)));
11842 delta += 3+alen;
11843 }
11844
11845 putXMMReg( gregOfRM(modrm), binop(Iop_64HLtoV128,mkexpr(d0),mkexpr(d0)) );
11846 goto decode_success;
11847 }
11848
11849 /* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */
11850 if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xD0) {
11851 IRTemp a3, a2, a1, a0, s3, s2, s1, s0;
11852 IRTemp eV = newTemp(Ity_V128);
11853 IRTemp gV = newTemp(Ity_V128);
11854 IRTemp addV = newTemp(Ity_V128);
11855 IRTemp subV = newTemp(Ity_V128);
11856 IRTemp rm = newTemp(Ity_I32);
11857 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
11858
11859 modrm = insn[3];
11860 if (epartIsReg(modrm)) {
11861 assign( eV, getXMMReg( eregOfRM(modrm)) );
11862 DIP("addsubps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
11863 nameXMMReg(gregOfRM(modrm)));
11864 delta += 3+1;
11865 } else {
11866 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11867 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
11868 DIP("addsubps %s,%s\n", dis_buf,
11869 nameXMMReg(gregOfRM(modrm)));
11870 delta += 3+alen;
11871 }
11872
11873 assign( gV, getXMMReg(gregOfRM(modrm)) );
11874
11875 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11876 assign( addV, triop(Iop_Add32Fx4, mkexpr(rm), mkexpr(gV), mkexpr(eV)) );
11877 assign( subV, triop(Iop_Sub32Fx4, mkexpr(rm), mkexpr(gV), mkexpr(eV)) );
11878
11879 breakup128to32s( addV, &a3, &a2, &a1, &a0 );
11880 breakup128to32s( subV, &s3, &s2, &s1, &s0 );
11881
11882 putXMMReg( gregOfRM(modrm), mk128from32s( a3, s2, a1, s0 ));
11883 goto decode_success;
11884 }
11885
11886 /* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */
11887 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD0) {
11888 IRTemp eV = newTemp(Ity_V128);
11889 IRTemp gV = newTemp(Ity_V128);
11890 IRTemp addV = newTemp(Ity_V128);
11891 IRTemp subV = newTemp(Ity_V128);
11892 IRTemp a1 = newTemp(Ity_I64);
11893 IRTemp s0 = newTemp(Ity_I64);
11894 IRTemp rm = newTemp(Ity_I32);
11895
11896 modrm = insn[2];
11897 if (epartIsReg(modrm)) {
11898 assign( eV, getXMMReg( eregOfRM(modrm)) );
11899 DIP("addsubpd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
11900 nameXMMReg(gregOfRM(modrm)));
11901 delta += 2+1;
11902 } else {
11903 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11904 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
11905 DIP("addsubpd %s,%s\n", dis_buf,
11906 nameXMMReg(gregOfRM(modrm)));
11907 delta += 2+alen;
11908 }
11909
11910 assign( gV, getXMMReg(gregOfRM(modrm)) );
11911
11912 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11913 assign( addV, triop(Iop_Add64Fx2, mkexpr(rm), mkexpr(gV), mkexpr(eV)) );
11914 assign( subV, triop(Iop_Sub64Fx2, mkexpr(rm), mkexpr(gV), mkexpr(eV)) );
11915
11916 assign( a1, unop(Iop_V128HIto64, mkexpr(addV) ));
11917 assign( s0, unop(Iop_V128to64, mkexpr(subV) ));
11918
11919 putXMMReg( gregOfRM(modrm),
11920 binop(Iop_64HLtoV128, mkexpr(a1), mkexpr(s0)) );
11921 goto decode_success;
11922 }
11923
11924 /* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */
11925 /* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */
11926 if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F
11927 && (insn[2] == 0x7C || insn[2] == 0x7D)) {
11928 IRTemp e3, e2, e1, e0, g3, g2, g1, g0;
11929 IRTemp eV = newTemp(Ity_V128);
11930 IRTemp gV = newTemp(Ity_V128);
11931 IRTemp leftV = newTemp(Ity_V128);
11932 IRTemp rightV = newTemp(Ity_V128);
11933 IRTemp rm = newTemp(Ity_I32);
11934 Bool isAdd = insn[2] == 0x7C;
11935 const HChar* str = isAdd ? "add" : "sub";
11936 e3 = e2 = e1 = e0 = g3 = g2 = g1 = g0 = IRTemp_INVALID;
11937
11938 modrm = insn[3];
11939 if (epartIsReg(modrm)) {
11940 assign( eV, getXMMReg( eregOfRM(modrm)) );
11941 DIP("h%sps %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
11942 nameXMMReg(gregOfRM(modrm)));
11943 delta += 3+1;
11944 } else {
11945 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11946 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
11947 DIP("h%sps %s,%s\n", str, dis_buf,
11948 nameXMMReg(gregOfRM(modrm)));
11949 delta += 3+alen;
11950 }
11951
11952 assign( gV, getXMMReg(gregOfRM(modrm)) );
11953
11954 breakup128to32s( eV, &e3, &e2, &e1, &e0 );
11955 breakup128to32s( gV, &g3, &g2, &g1, &g0 );
11956
11957 assign( leftV, mk128from32s( e2, e0, g2, g0 ) );
11958 assign( rightV, mk128from32s( e3, e1, g3, g1 ) );
11959
11960 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11961 putXMMReg( gregOfRM(modrm),
11962 triop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4,
11963 mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) );
11964 goto decode_success;
11965 }
11966
11967 /* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */
11968 /* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */
11969 if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x7C || insn[1] == 0x7D)) {
11970 IRTemp e1 = newTemp(Ity_I64);
11971 IRTemp e0 = newTemp(Ity_I64);
11972 IRTemp g1 = newTemp(Ity_I64);
11973 IRTemp g0 = newTemp(Ity_I64);
11974 IRTemp eV = newTemp(Ity_V128);
11975 IRTemp gV = newTemp(Ity_V128);
11976 IRTemp leftV = newTemp(Ity_V128);
11977 IRTemp rightV = newTemp(Ity_V128);
11978 IRTemp rm = newTemp(Ity_I32);
11979 Bool isAdd = insn[1] == 0x7C;
11980 const HChar* str = isAdd ? "add" : "sub";
11981
11982 modrm = insn[2];
11983 if (epartIsReg(modrm)) {
11984 assign( eV, getXMMReg( eregOfRM(modrm)) );
11985 DIP("h%spd %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
11986 nameXMMReg(gregOfRM(modrm)));
11987 delta += 2+1;
11988 } else {
11989 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11990 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
11991 DIP("h%spd %s,%s\n", str, dis_buf,
11992 nameXMMReg(gregOfRM(modrm)));
11993 delta += 2+alen;
11994 }
11995
11996 assign( gV, getXMMReg(gregOfRM(modrm)) );
11997
11998 assign( e1, unop(Iop_V128HIto64, mkexpr(eV) ));
11999 assign( e0, unop(Iop_V128to64, mkexpr(eV) ));
12000 assign( g1, unop(Iop_V128HIto64, mkexpr(gV) ));
12001 assign( g0, unop(Iop_V128to64, mkexpr(gV) ));
12002
12003 assign( leftV, binop(Iop_64HLtoV128, mkexpr(e0),mkexpr(g0)) );
12004 assign( rightV, binop(Iop_64HLtoV128, mkexpr(e1),mkexpr(g1)) );
12005
12006 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
12007 putXMMReg( gregOfRM(modrm),
12008 triop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2,
12009 mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) );
12010 goto decode_success;
12011 }
12012
12013 /* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */
12014 if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xF0) {
12015 modrm = getIByte(delta+3);
12016 if (epartIsReg(modrm)) {
12017 goto decode_failure;
12018 } else {
12019 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12020 putXMMReg( gregOfRM(modrm),
12021 loadLE(Ity_V128, mkexpr(addr)) );
12022 DIP("lddqu %s,%s\n", dis_buf,
12023 nameXMMReg(gregOfRM(modrm)));
12024 delta += 3+alen;
12025 }
12026 goto decode_success;
12027 }
12028
12029 /* ---------------------------------------------------- */
12030 /* --- end of the SSE3 decoder. --- */
12031 /* ---------------------------------------------------- */
12032
12033 /* ---------------------------------------------------- */
12034 /* --- start of the SSSE3 decoder. --- */
12035 /* ---------------------------------------------------- */
12036
12037 /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
12038 Unsigned Bytes (MMX) */
12039 if (sz == 4
12040 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) {
12041 IRTemp sV = newTemp(Ity_I64);
12042 IRTemp dV = newTemp(Ity_I64);
12043 IRTemp sVoddsSX = newTemp(Ity_I64);
12044 IRTemp sVevensSX = newTemp(Ity_I64);
12045 IRTemp dVoddsZX = newTemp(Ity_I64);
12046 IRTemp dVevensZX = newTemp(Ity_I64);
12047
12048 modrm = insn[3];
12049 do_MMX_preamble();
12050 assign( dV, getMMXReg(gregOfRM(modrm)) );
12051
12052 if (epartIsReg(modrm)) {
12053 assign( sV, getMMXReg(eregOfRM(modrm)) );
12054 delta += 3+1;
12055 DIP("pmaddubsw %s,%s\n", nameMMXReg(eregOfRM(modrm)),
12056 nameMMXReg(gregOfRM(modrm)));
12057 } else {
12058 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12059 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12060 delta += 3+alen;
12061 DIP("pmaddubsw %s,%s\n", dis_buf,
12062 nameMMXReg(gregOfRM(modrm)));
12063 }
12064
12065 /* compute dV unsigned x sV signed */
12066 assign( sVoddsSX,
12067 binop(Iop_SarN16x4, mkexpr(sV), mkU8(8)) );
12068 assign( sVevensSX,
12069 binop(Iop_SarN16x4,
12070 binop(Iop_ShlN16x4, mkexpr(sV), mkU8(8)),
12071 mkU8(8)) );
12072 assign( dVoddsZX,
12073 binop(Iop_ShrN16x4, mkexpr(dV), mkU8(8)) );
12074 assign( dVevensZX,
12075 binop(Iop_ShrN16x4,
12076 binop(Iop_ShlN16x4, mkexpr(dV), mkU8(8)),
12077 mkU8(8)) );
12078
12079 putMMXReg(
12080 gregOfRM(modrm),
12081 binop(Iop_QAdd16Sx4,
12082 binop(Iop_Mul16x4, mkexpr(sVoddsSX), mkexpr(dVoddsZX)),
12083 binop(Iop_Mul16x4, mkexpr(sVevensSX), mkexpr(dVevensZX))
12084 )
12085 );
12086 goto decode_success;
12087 }
12088
12089 /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
12090 Unsigned Bytes (XMM) */
12091 if (sz == 2
12092 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) {
12093 IRTemp sV = newTemp(Ity_V128);
12094 IRTemp dV = newTemp(Ity_V128);
12095 IRTemp sVoddsSX = newTemp(Ity_V128);
12096 IRTemp sVevensSX = newTemp(Ity_V128);
12097 IRTemp dVoddsZX = newTemp(Ity_V128);
12098 IRTemp dVevensZX = newTemp(Ity_V128);
12099
12100 modrm = insn[3];
12101 assign( dV, getXMMReg(gregOfRM(modrm)) );
12102
12103 if (epartIsReg(modrm)) {
12104 assign( sV, getXMMReg(eregOfRM(modrm)) );
12105 delta += 3+1;
12106 DIP("pmaddubsw %s,%s\n", nameXMMReg(eregOfRM(modrm)),
12107 nameXMMReg(gregOfRM(modrm)));
12108 } else {
12109 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12110 gen_SEGV_if_not_16_aligned( addr );
12111 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12112 delta += 3+alen;
12113 DIP("pmaddubsw %s,%s\n", dis_buf,
12114 nameXMMReg(gregOfRM(modrm)));
12115 }
12116
12117 /* compute dV unsigned x sV signed */
12118 assign( sVoddsSX,
12119 binop(Iop_SarN16x8, mkexpr(sV), mkU8(8)) );
12120 assign( sVevensSX,
12121 binop(Iop_SarN16x8,
12122 binop(Iop_ShlN16x8, mkexpr(sV), mkU8(8)),
12123 mkU8(8)) );
12124 assign( dVoddsZX,
12125 binop(Iop_ShrN16x8, mkexpr(dV), mkU8(8)) );
12126 assign( dVevensZX,
12127 binop(Iop_ShrN16x8,
12128 binop(Iop_ShlN16x8, mkexpr(dV), mkU8(8)),
12129 mkU8(8)) );
12130
12131 putXMMReg(
12132 gregOfRM(modrm),
12133 binop(Iop_QAdd16Sx8,
12134 binop(Iop_Mul16x8, mkexpr(sVoddsSX), mkexpr(dVoddsZX)),
12135 binop(Iop_Mul16x8, mkexpr(sVevensSX), mkexpr(dVevensZX))
12136 )
12137 );
12138 goto decode_success;
12139 }
12140
12141 /* ***--- these are MMX class insns introduced in SSSE3 ---*** */
12142 /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or
12143 mmx) and G to G (mmx). */
12144 /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or
12145 mmx) and G to G (mmx). */
12146 /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G
12147 to G (mmx). */
12148 /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G
12149 to G (mmx). */
12150 /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G
12151 to G (mmx). */
12152 /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G
12153 to G (mmx). */
12154
12155 if (sz == 4
12156 && insn[0] == 0x0F && insn[1] == 0x38
12157 && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01
12158 || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) {
12159 const HChar* str = "???";
12160 IROp opV64 = Iop_INVALID;
12161 IROp opCatO = Iop_CatOddLanes16x4;
12162 IROp opCatE = Iop_CatEvenLanes16x4;
12163 IRTemp sV = newTemp(Ity_I64);
12164 IRTemp dV = newTemp(Ity_I64);
12165
12166 modrm = insn[3];
12167
12168 switch (insn[2]) {
12169 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
12170 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
12171 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
12172 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
12173 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
12174 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
12175 default: vassert(0);
12176 }
12177 if (insn[2] == 0x02 || insn[2] == 0x06) {
12178 opCatO = Iop_InterleaveHI32x2;
12179 opCatE = Iop_InterleaveLO32x2;
12180 }
12181
12182 do_MMX_preamble();
12183 assign( dV, getMMXReg(gregOfRM(modrm)) );
12184
12185 if (epartIsReg(modrm)) {
12186 assign( sV, getMMXReg(eregOfRM(modrm)) );
12187 delta += 3+1;
12188 DIP("ph%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)),
12189 nameMMXReg(gregOfRM(modrm)));
12190 } else {
12191 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12192 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12193 delta += 3+alen;
12194 DIP("ph%s %s,%s\n", str, dis_buf,
12195 nameMMXReg(gregOfRM(modrm)));
12196 }
12197
12198 putMMXReg(
12199 gregOfRM(modrm),
12200 binop(opV64,
12201 binop(opCatE,mkexpr(sV),mkexpr(dV)),
12202 binop(opCatO,mkexpr(sV),mkexpr(dV))
12203 )
12204 );
12205 goto decode_success;
12206 }
12207
12208 /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or
12209 xmm) and G to G (xmm). */
12210 /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or
12211 xmm) and G to G (xmm). */
12212 /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and
12213 G to G (xmm). */
12214 /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and
12215 G to G (xmm). */
12216 /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and
12217 G to G (xmm). */
12218 /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and
12219 G to G (xmm). */
12220
12221 if (sz == 2
12222 && insn[0] == 0x0F && insn[1] == 0x38
12223 && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01
12224 || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) {
12225 const HChar* str = "???";
12226 IROp opV64 = Iop_INVALID;
12227 IROp opCatO = Iop_CatOddLanes16x4;
12228 IROp opCatE = Iop_CatEvenLanes16x4;
12229 IRTemp sV = newTemp(Ity_V128);
12230 IRTemp dV = newTemp(Ity_V128);
12231 IRTemp sHi = newTemp(Ity_I64);
12232 IRTemp sLo = newTemp(Ity_I64);
12233 IRTemp dHi = newTemp(Ity_I64);
12234 IRTemp dLo = newTemp(Ity_I64);
12235
12236 modrm = insn[3];
12237
12238 switch (insn[2]) {
12239 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
12240 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
12241 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
12242 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
12243 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
12244 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
12245 default: vassert(0);
12246 }
12247 if (insn[2] == 0x02 || insn[2] == 0x06) {
12248 opCatO = Iop_InterleaveHI32x2;
12249 opCatE = Iop_InterleaveLO32x2;
12250 }
12251
12252 assign( dV, getXMMReg(gregOfRM(modrm)) );
12253
12254 if (epartIsReg(modrm)) {
12255 assign( sV, getXMMReg( eregOfRM(modrm)) );
12256 DIP("ph%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
12257 nameXMMReg(gregOfRM(modrm)));
12258 delta += 3+1;
12259 } else {
12260 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12261 gen_SEGV_if_not_16_aligned( addr );
12262 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12263 DIP("ph%s %s,%s\n", str, dis_buf,
12264 nameXMMReg(gregOfRM(modrm)));
12265 delta += 3+alen;
12266 }
12267
12268 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
12269 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
12270 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
12271 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
12272
12273 /* This isn't a particularly efficient way to compute the
12274 result, but at least it avoids a proliferation of IROps,
12275 hence avoids complication all the backends. */
12276 putXMMReg(
12277 gregOfRM(modrm),
12278 binop(Iop_64HLtoV128,
12279 binop(opV64,
12280 binop(opCatE,mkexpr(sHi),mkexpr(sLo)),
12281 binop(opCatO,mkexpr(sHi),mkexpr(sLo))
12282 ),
12283 binop(opV64,
12284 binop(opCatE,mkexpr(dHi),mkexpr(dLo)),
12285 binop(opCatO,mkexpr(dHi),mkexpr(dLo))
12286 )
12287 )
12288 );
12289 goto decode_success;
12290 }
12291
12292 /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale
12293 (MMX) */
12294 if (sz == 4
12295 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) {
12296 IRTemp sV = newTemp(Ity_I64);
12297 IRTemp dV = newTemp(Ity_I64);
12298
12299 modrm = insn[3];
12300 do_MMX_preamble();
12301 assign( dV, getMMXReg(gregOfRM(modrm)) );
12302
12303 if (epartIsReg(modrm)) {
12304 assign( sV, getMMXReg(eregOfRM(modrm)) );
12305 delta += 3+1;
12306 DIP("pmulhrsw %s,%s\n", nameMMXReg(eregOfRM(modrm)),
12307 nameMMXReg(gregOfRM(modrm)));
12308 } else {
12309 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12310 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12311 delta += 3+alen;
12312 DIP("pmulhrsw %s,%s\n", dis_buf,
12313 nameMMXReg(gregOfRM(modrm)));
12314 }
12315
12316 putMMXReg(
12317 gregOfRM(modrm),
12318 dis_PMULHRSW_helper( mkexpr(sV), mkexpr(dV) )
12319 );
12320 goto decode_success;
12321 }
12322
12323 /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and
12324 Scale (XMM) */
12325 if (sz == 2
12326 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) {
12327 IRTemp sV = newTemp(Ity_V128);
12328 IRTemp dV = newTemp(Ity_V128);
12329 IRTemp sHi = newTemp(Ity_I64);
12330 IRTemp sLo = newTemp(Ity_I64);
12331 IRTemp dHi = newTemp(Ity_I64);
12332 IRTemp dLo = newTemp(Ity_I64);
12333
12334 modrm = insn[3];
12335 assign( dV, getXMMReg(gregOfRM(modrm)) );
12336
12337 if (epartIsReg(modrm)) {
12338 assign( sV, getXMMReg(eregOfRM(modrm)) );
12339 delta += 3+1;
12340 DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRM(modrm)),
12341 nameXMMReg(gregOfRM(modrm)));
12342 } else {
12343 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12344 gen_SEGV_if_not_16_aligned( addr );
12345 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12346 delta += 3+alen;
12347 DIP("pmulhrsw %s,%s\n", dis_buf,
12348 nameXMMReg(gregOfRM(modrm)));
12349 }
12350
12351 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
12352 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
12353 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
12354 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
12355
12356 putXMMReg(
12357 gregOfRM(modrm),
12358 binop(Iop_64HLtoV128,
12359 dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ),
12360 dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) )
12361 )
12362 );
12363 goto decode_success;
12364 }
12365
12366 /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */
12367 /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */
12368 /* 0F 38 09 = PSIGND -- Packed Sign 32x2 (MMX) */
12369 if (sz == 4
12370 && insn[0] == 0x0F && insn[1] == 0x38
12371 && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) {
12372 IRTemp sV = newTemp(Ity_I64);
12373 IRTemp dV = newTemp(Ity_I64);
12374 const HChar* str = "???";
12375 Int laneszB = 0;
12376
12377 switch (insn[2]) {
12378 case 0x08: laneszB = 1; str = "b"; break;
12379 case 0x09: laneszB = 2; str = "w"; break;
12380 case 0x0A: laneszB = 4; str = "d"; break;
12381 default: vassert(0);
12382 }
12383
12384 modrm = insn[3];
12385 do_MMX_preamble();
12386 assign( dV, getMMXReg(gregOfRM(modrm)) );
12387
12388 if (epartIsReg(modrm)) {
12389 assign( sV, getMMXReg(eregOfRM(modrm)) );
12390 delta += 3+1;
12391 DIP("psign%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)),
12392 nameMMXReg(gregOfRM(modrm)));
12393 } else {
12394 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12395 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12396 delta += 3+alen;
12397 DIP("psign%s %s,%s\n", str, dis_buf,
12398 nameMMXReg(gregOfRM(modrm)));
12399 }
12400
12401 putMMXReg(
12402 gregOfRM(modrm),
12403 dis_PSIGN_helper( mkexpr(sV), mkexpr(dV), laneszB )
12404 );
12405 goto decode_success;
12406 }
12407
12408 /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */
12409 /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */
12410 /* 66 0F 38 09 = PSIGND -- Packed Sign 32x4 (XMM) */
12411 if (sz == 2
12412 && insn[0] == 0x0F && insn[1] == 0x38
12413 && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) {
12414 IRTemp sV = newTemp(Ity_V128);
12415 IRTemp dV = newTemp(Ity_V128);
12416 IRTemp sHi = newTemp(Ity_I64);
12417 IRTemp sLo = newTemp(Ity_I64);
12418 IRTemp dHi = newTemp(Ity_I64);
12419 IRTemp dLo = newTemp(Ity_I64);
12420 const HChar* str = "???";
12421 Int laneszB = 0;
12422
12423 switch (insn[2]) {
12424 case 0x08: laneszB = 1; str = "b"; break;
12425 case 0x09: laneszB = 2; str = "w"; break;
12426 case 0x0A: laneszB = 4; str = "d"; break;
12427 default: vassert(0);
12428 }
12429
12430 modrm = insn[3];
12431 assign( dV, getXMMReg(gregOfRM(modrm)) );
12432
12433 if (epartIsReg(modrm)) {
12434 assign( sV, getXMMReg(eregOfRM(modrm)) );
12435 delta += 3+1;
12436 DIP("psign%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
12437 nameXMMReg(gregOfRM(modrm)));
12438 } else {
12439 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12440 gen_SEGV_if_not_16_aligned( addr );
12441 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12442 delta += 3+alen;
12443 DIP("psign%s %s,%s\n", str, dis_buf,
12444 nameXMMReg(gregOfRM(modrm)));
12445 }
12446
12447 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
12448 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
12449 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
12450 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
12451
12452 putXMMReg(
12453 gregOfRM(modrm),
12454 binop(Iop_64HLtoV128,
12455 dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ),
12456 dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB )
12457 )
12458 );
12459 goto decode_success;
12460 }
12461
12462 /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */
12463 /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */
12464 /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */
12465 if (sz == 4
12466 && insn[0] == 0x0F && insn[1] == 0x38
12467 && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) {
12468 IRTemp sV = newTemp(Ity_I64);
12469 const HChar* str = "???";
12470 Int laneszB = 0;
12471
12472 switch (insn[2]) {
12473 case 0x1C: laneszB = 1; str = "b"; break;
12474 case 0x1D: laneszB = 2; str = "w"; break;
12475 case 0x1E: laneszB = 4; str = "d"; break;
12476 default: vassert(0);
12477 }
12478
12479 modrm = insn[3];
12480 do_MMX_preamble();
12481
12482 if (epartIsReg(modrm)) {
12483 assign( sV, getMMXReg(eregOfRM(modrm)) );
12484 delta += 3+1;
12485 DIP("pabs%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)),
12486 nameMMXReg(gregOfRM(modrm)));
12487 } else {
12488 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12489 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12490 delta += 3+alen;
12491 DIP("pabs%s %s,%s\n", str, dis_buf,
12492 nameMMXReg(gregOfRM(modrm)));
12493 }
12494
12495 putMMXReg(
12496 gregOfRM(modrm),
12497 dis_PABS_helper( mkexpr(sV), laneszB )
12498 );
12499 goto decode_success;
12500 }
12501
12502 /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */
12503 /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */
12504 /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */
12505 if (sz == 2
12506 && insn[0] == 0x0F && insn[1] == 0x38
12507 && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) {
12508 IRTemp sV = newTemp(Ity_V128);
12509 IRTemp sHi = newTemp(Ity_I64);
12510 IRTemp sLo = newTemp(Ity_I64);
12511 const HChar* str = "???";
12512 Int laneszB = 0;
12513
12514 switch (insn[2]) {
12515 case 0x1C: laneszB = 1; str = "b"; break;
12516 case 0x1D: laneszB = 2; str = "w"; break;
12517 case 0x1E: laneszB = 4; str = "d"; break;
12518 default: vassert(0);
12519 }
12520
12521 modrm = insn[3];
12522
12523 if (epartIsReg(modrm)) {
12524 assign( sV, getXMMReg(eregOfRM(modrm)) );
12525 delta += 3+1;
12526 DIP("pabs%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
12527 nameXMMReg(gregOfRM(modrm)));
12528 } else {
12529 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12530 gen_SEGV_if_not_16_aligned( addr );
12531 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12532 delta += 3+alen;
12533 DIP("pabs%s %s,%s\n", str, dis_buf,
12534 nameXMMReg(gregOfRM(modrm)));
12535 }
12536
12537 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
12538 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
12539
12540 putXMMReg(
12541 gregOfRM(modrm),
12542 binop(Iop_64HLtoV128,
12543 dis_PABS_helper( mkexpr(sHi), laneszB ),
12544 dis_PABS_helper( mkexpr(sLo), laneszB )
12545 )
12546 );
12547 goto decode_success;
12548 }
12549
12550 /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */
12551 if (sz == 4
12552 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) {
12553 IRTemp sV = newTemp(Ity_I64);
12554 IRTemp dV = newTemp(Ity_I64);
12555 IRTemp res = newTemp(Ity_I64);
12556
12557 modrm = insn[3];
12558 do_MMX_preamble();
12559 assign( dV, getMMXReg(gregOfRM(modrm)) );
12560
12561 if (epartIsReg(modrm)) {
12562 assign( sV, getMMXReg(eregOfRM(modrm)) );
12563 d32 = (UInt)insn[3+1];
12564 delta += 3+1+1;
12565 DIP("palignr $%d,%s,%s\n", (Int)d32,
12566 nameMMXReg(eregOfRM(modrm)),
12567 nameMMXReg(gregOfRM(modrm)));
12568 } else {
12569 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12570 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12571 d32 = (UInt)insn[3+alen];
12572 delta += 3+alen+1;
12573 DIP("palignr $%d%s,%s\n", (Int)d32,
12574 dis_buf,
12575 nameMMXReg(gregOfRM(modrm)));
12576 }
12577
12578 if (d32 == 0) {
12579 assign( res, mkexpr(sV) );
12580 }
12581 else if (d32 >= 1 && d32 <= 7) {
12582 assign(res,
12583 binop(Iop_Or64,
12584 binop(Iop_Shr64, mkexpr(sV), mkU8(8*d32)),
12585 binop(Iop_Shl64, mkexpr(dV), mkU8(8*(8-d32))
12586 )));
12587 }
12588 else if (d32 == 8) {
12589 assign( res, mkexpr(dV) );
12590 }
12591 else if (d32 >= 9 && d32 <= 15) {
12592 assign( res, binop(Iop_Shr64, mkexpr(dV), mkU8(8*(d32-8))) );
12593 }
12594 else if (d32 >= 16 && d32 <= 255) {
12595 assign( res, mkU64(0) );
12596 }
12597 else
12598 vassert(0);
12599
12600 putMMXReg( gregOfRM(modrm), mkexpr(res) );
12601 goto decode_success;
12602 }
12603
12604 /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */
12605 if (sz == 2
12606 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) {
12607 IRTemp sV = newTemp(Ity_V128);
12608 IRTemp dV = newTemp(Ity_V128);
12609 IRTemp sHi = newTemp(Ity_I64);
12610 IRTemp sLo = newTemp(Ity_I64);
12611 IRTemp dHi = newTemp(Ity_I64);
12612 IRTemp dLo = newTemp(Ity_I64);
12613 IRTemp rHi = newTemp(Ity_I64);
12614 IRTemp rLo = newTemp(Ity_I64);
12615
12616 modrm = insn[3];
12617 assign( dV, getXMMReg(gregOfRM(modrm)) );
12618
12619 if (epartIsReg(modrm)) {
12620 assign( sV, getXMMReg(eregOfRM(modrm)) );
12621 d32 = (UInt)insn[3+1];
12622 delta += 3+1+1;
12623 DIP("palignr $%d,%s,%s\n", (Int)d32,
12624 nameXMMReg(eregOfRM(modrm)),
12625 nameXMMReg(gregOfRM(modrm)));
12626 } else {
12627 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12628 gen_SEGV_if_not_16_aligned( addr );
12629 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12630 d32 = (UInt)insn[3+alen];
12631 delta += 3+alen+1;
12632 DIP("palignr $%d,%s,%s\n", (Int)d32,
12633 dis_buf,
12634 nameXMMReg(gregOfRM(modrm)));
12635 }
12636
12637 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
12638 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
12639 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
12640 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
12641
12642 if (d32 == 0) {
12643 assign( rHi, mkexpr(sHi) );
12644 assign( rLo, mkexpr(sLo) );
12645 }
12646 else if (d32 >= 1 && d32 <= 7) {
12647 assign( rHi, dis_PALIGNR_XMM_helper(dLo, sHi, d32) );
12648 assign( rLo, dis_PALIGNR_XMM_helper(sHi, sLo, d32) );
12649 }
12650 else if (d32 == 8) {
12651 assign( rHi, mkexpr(dLo) );
12652 assign( rLo, mkexpr(sHi) );
12653 }
12654 else if (d32 >= 9 && d32 <= 15) {
12655 assign( rHi, dis_PALIGNR_XMM_helper(dHi, dLo, d32-8) );
12656 assign( rLo, dis_PALIGNR_XMM_helper(dLo, sHi, d32-8) );
12657 }
12658 else if (d32 == 16) {
12659 assign( rHi, mkexpr(dHi) );
12660 assign( rLo, mkexpr(dLo) );
12661 }
12662 else if (d32 >= 17 && d32 <= 23) {
12663 assign( rHi, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d32-16))) );
12664 assign( rLo, dis_PALIGNR_XMM_helper(dHi, dLo, d32-16) );
12665 }
12666 else if (d32 == 24) {
12667 assign( rHi, mkU64(0) );
12668 assign( rLo, mkexpr(dHi) );
12669 }
12670 else if (d32 >= 25 && d32 <= 31) {
12671 assign( rHi, mkU64(0) );
12672 assign( rLo, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d32-24))) );
12673 }
12674 else if (d32 >= 32 && d32 <= 255) {
12675 assign( rHi, mkU64(0) );
12676 assign( rLo, mkU64(0) );
12677 }
12678 else
12679 vassert(0);
12680
12681 putXMMReg(
12682 gregOfRM(modrm),
12683 binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo))
12684 );
12685 goto decode_success;
12686 }
12687
12688 /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */
12689 if (sz == 4
12690 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) {
12691 IRTemp sV = newTemp(Ity_I64);
12692 IRTemp dV = newTemp(Ity_I64);
12693
12694 modrm = insn[3];
12695 do_MMX_preamble();
12696 assign( dV, getMMXReg(gregOfRM(modrm)) );
12697
12698 if (epartIsReg(modrm)) {
12699 assign( sV, getMMXReg(eregOfRM(modrm)) );
12700 delta += 3+1;
12701 DIP("pshufb %s,%s\n", nameMMXReg(eregOfRM(modrm)),
12702 nameMMXReg(gregOfRM(modrm)));
12703 } else {
12704 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12705 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12706 delta += 3+alen;
12707 DIP("pshufb %s,%s\n", dis_buf,
12708 nameMMXReg(gregOfRM(modrm)));
12709 }
12710
12711 putMMXReg(
12712 gregOfRM(modrm),
12713 binop(
12714 Iop_And64,
12715 /* permute the lanes */
12716 binop(
12717 Iop_Perm8x8,
12718 mkexpr(dV),
12719 binop(Iop_And64, mkexpr(sV), mkU64(0x0707070707070707ULL))
12720 ),
12721 /* mask off lanes which have (index & 0x80) == 0x80 */
12722 unop(Iop_Not64, binop(Iop_SarN8x8, mkexpr(sV), mkU8(7)))
12723 )
12724 );
12725 goto decode_success;
12726 }
12727
12728 /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */
12729 if (sz == 2
12730 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) {
12731 IRTemp sV = newTemp(Ity_V128);
12732 IRTemp dV = newTemp(Ity_V128);
12733 IRTemp sHi = newTemp(Ity_I64);
12734 IRTemp sLo = newTemp(Ity_I64);
12735 IRTemp dHi = newTemp(Ity_I64);
12736 IRTemp dLo = newTemp(Ity_I64);
12737 IRTemp rHi = newTemp(Ity_I64);
12738 IRTemp rLo = newTemp(Ity_I64);
12739 IRTemp sevens = newTemp(Ity_I64);
12740 IRTemp mask0x80hi = newTemp(Ity_I64);
12741 IRTemp mask0x80lo = newTemp(Ity_I64);
12742 IRTemp maskBit3hi = newTemp(Ity_I64);
12743 IRTemp maskBit3lo = newTemp(Ity_I64);
12744 IRTemp sAnd7hi = newTemp(Ity_I64);
12745 IRTemp sAnd7lo = newTemp(Ity_I64);
12746 IRTemp permdHi = newTemp(Ity_I64);
12747 IRTemp permdLo = newTemp(Ity_I64);
12748
12749 modrm = insn[3];
12750 assign( dV, getXMMReg(gregOfRM(modrm)) );
12751
12752 if (epartIsReg(modrm)) {
12753 assign( sV, getXMMReg(eregOfRM(modrm)) );
12754 delta += 3+1;
12755 DIP("pshufb %s,%s\n", nameXMMReg(eregOfRM(modrm)),
12756 nameXMMReg(gregOfRM(modrm)));
12757 } else {
12758 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12759 gen_SEGV_if_not_16_aligned( addr );
12760 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12761 delta += 3+alen;
12762 DIP("pshufb %s,%s\n", dis_buf,
12763 nameXMMReg(gregOfRM(modrm)));
12764 }
12765
12766 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
12767 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
12768 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
12769 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
12770
12771 assign( sevens, mkU64(0x0707070707070707ULL) );
12772
12773 /*
12774 mask0x80hi = Not(SarN8x8(sHi,7))
12775 maskBit3hi = SarN8x8(ShlN8x8(sHi,4),7)
12776 sAnd7hi = And(sHi,sevens)
12777 permdHi = Or( And(Perm8x8(dHi,sAnd7hi),maskBit3hi),
12778 And(Perm8x8(dLo,sAnd7hi),Not(maskBit3hi)) )
12779 rHi = And(permdHi,mask0x80hi)
12780 */
12781 assign(
12782 mask0x80hi,
12783 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sHi),mkU8(7))));
12784
12785 assign(
12786 maskBit3hi,
12787 binop(Iop_SarN8x8,
12788 binop(Iop_ShlN8x8,mkexpr(sHi),mkU8(4)),
12789 mkU8(7)));
12790
12791 assign(sAnd7hi, binop(Iop_And64,mkexpr(sHi),mkexpr(sevens)));
12792
12793 assign(
12794 permdHi,
12795 binop(
12796 Iop_Or64,
12797 binop(Iop_And64,
12798 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7hi)),
12799 mkexpr(maskBit3hi)),
12800 binop(Iop_And64,
12801 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7hi)),
12802 unop(Iop_Not64,mkexpr(maskBit3hi))) ));
12803
12804 assign(rHi, binop(Iop_And64,mkexpr(permdHi),mkexpr(mask0x80hi)) );
12805
12806 /* And the same for the lower half of the result. What fun. */
12807
12808 assign(
12809 mask0x80lo,
12810 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sLo),mkU8(7))));
12811
12812 assign(
12813 maskBit3lo,
12814 binop(Iop_SarN8x8,
12815 binop(Iop_ShlN8x8,mkexpr(sLo),mkU8(4)),
12816 mkU8(7)));
12817
12818 assign(sAnd7lo, binop(Iop_And64,mkexpr(sLo),mkexpr(sevens)));
12819
12820 assign(
12821 permdLo,
12822 binop(
12823 Iop_Or64,
12824 binop(Iop_And64,
12825 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7lo)),
12826 mkexpr(maskBit3lo)),
12827 binop(Iop_And64,
12828 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7lo)),
12829 unop(Iop_Not64,mkexpr(maskBit3lo))) ));
12830
12831 assign(rLo, binop(Iop_And64,mkexpr(permdLo),mkexpr(mask0x80lo)) );
12832
12833 putXMMReg(
12834 gregOfRM(modrm),
12835 binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo))
12836 );
12837 goto decode_success;
12838 }
12839
12840 /* 0F 38 F0 = MOVBE m16/32(E), r16/32(G) */
12841 /* 0F 38 F1 = MOVBE r16/32(G), m16/32(E) */
12842 if ((sz == 2 || sz == 4)
12843 && insn[0] == 0x0F && insn[1] == 0x38
12844 && (insn[2] == 0xF0 || insn[2] == 0xF1)
12845 && !epartIsReg(insn[3])) {
12846
12847 modrm = insn[3];
12848 addr = disAMode(&alen, sorb, delta + 3, dis_buf);
12849 delta += 3 + alen;
12850 ty = szToITy(sz);
12851 IRTemp src = newTemp(ty);
12852
12853 if (insn[2] == 0xF0) { /* LOAD */
12854 assign(src, loadLE(ty, mkexpr(addr)));
12855 IRTemp dst = math_BSWAP(src, ty);
12856 putIReg(sz, gregOfRM(modrm), mkexpr(dst));
12857 DIP("movbe %s,%s\n", dis_buf, nameIReg(sz, gregOfRM(modrm)));
12858 } else { /* STORE */
12859 assign(src, getIReg(sz, gregOfRM(modrm)));
12860 IRTemp dst = math_BSWAP(src, ty);
12861 storeLE(mkexpr(addr), mkexpr(dst));
12862 DIP("movbe %s,%s\n", nameIReg(sz, gregOfRM(modrm)), dis_buf);
12863 }
12864 goto decode_success;
12865 }
12866
12867 /* ---------------------------------------------------- */
12868 /* --- end of the SSSE3 decoder. --- */
12869 /* ---------------------------------------------------- */
12870
12871 /* ---------------------------------------------------- */
12872 /* --- start of the SSE4 decoder --- */
12873 /* ---------------------------------------------------- */
12874
12875 /* 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1
12876 (Partial implementation only -- only deal with cases where
12877 the rounding mode is specified directly by the immediate byte.)
12878 66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1
12879 (Limitations ditto)
12880 */
12881 if (sz == 2
12882 && insn[0] == 0x0F && insn[1] == 0x3A
12883 && (/*insn[2] == 0x0B || */insn[2] == 0x0A)) {
12884
12885 Bool isD = insn[2] == 0x0B;
12886 IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32);
12887 IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32);
12888 Int imm = 0;
12889
12890 modrm = insn[3];
12891
12892 if (epartIsReg(modrm)) {
12893 assign( src,
12894 isD ? getXMMRegLane64F( eregOfRM(modrm), 0 )
12895 : getXMMRegLane32F( eregOfRM(modrm), 0 ) );
12896 imm = insn[3+1];
12897 if (imm & ~3) goto decode_failure;
12898 delta += 3+1+1;
12899 DIP( "rounds%c $%d,%s,%s\n",
12900 isD ? 'd' : 's',
12901 imm, nameXMMReg( eregOfRM(modrm) ),
12902 nameXMMReg( gregOfRM(modrm) ) );
12903 } else {
12904 addr = disAMode( &alen, sorb, delta+3, dis_buf );
12905 assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) ));
12906 imm = insn[3+alen];
12907 if (imm & ~3) goto decode_failure;
12908 delta += 3+alen+1;
12909 DIP( "roundsd $%d,%s,%s\n",
12910 imm, dis_buf, nameXMMReg( gregOfRM(modrm) ) );
12911 }
12912
12913 /* (imm & 3) contains an Intel-encoded rounding mode. Because
12914 that encoding is the same as the encoding for IRRoundingMode,
12915 we can use that value directly in the IR as a rounding
12916 mode. */
12917 assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
12918 mkU32(imm & 3), mkexpr(src)) );
12919
12920 if (isD)
12921 putXMMRegLane64F( gregOfRM(modrm), 0, mkexpr(res) );
12922 else
12923 putXMMRegLane32F( gregOfRM(modrm), 0, mkexpr(res) );
12924
12925 goto decode_success;
12926 }
12927
12928 /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension,
12929 which we can only decode if we're sure this is an AMD cpu that
12930 supports LZCNT, since otherwise it's BSR, which behaves
12931 differently. */
12932 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xBD
12933 && 0 != (archinfo->hwcaps & VEX_HWCAPS_X86_LZCNT)) {
12934 vassert(sz == 2 || sz == 4);
12935 /*IRType*/ ty = szToITy(sz);
12936 IRTemp src = newTemp(ty);
12937 modrm = insn[3];
12938 if (epartIsReg(modrm)) {
12939 assign(src, getIReg(sz, eregOfRM(modrm)));
12940 delta += 3+1;
12941 DIP("lzcnt%c %s, %s\n", nameISize(sz),
12942 nameIReg(sz, eregOfRM(modrm)),
12943 nameIReg(sz, gregOfRM(modrm)));
12944 } else {
12945 addr = disAMode( &alen, sorb, delta+3, dis_buf );
12946 assign(src, loadLE(ty, mkexpr(addr)));
12947 delta += 3+alen;
12948 DIP("lzcnt%c %s, %s\n", nameISize(sz), dis_buf,
12949 nameIReg(sz, gregOfRM(modrm)));
12950 }
12951
12952 IRTemp res = gen_LZCNT(ty, src);
12953 putIReg(sz, gregOfRM(modrm), mkexpr(res));
12954
12955 // Update flags. This is pretty lame .. perhaps can do better
12956 // if this turns out to be performance critical.
12957 // O S A P are cleared. Z is set if RESULT == 0.
12958 // C is set if SRC is zero.
12959 IRTemp src32 = newTemp(Ity_I32);
12960 IRTemp res32 = newTemp(Ity_I32);
12961 assign(src32, widenUto32(mkexpr(src)));
12962 assign(res32, widenUto32(mkexpr(res)));
12963
12964 IRTemp oszacp = newTemp(Ity_I32);
12965 assign(
12966 oszacp,
12967 binop(Iop_Or32,
12968 binop(Iop_Shl32,
12969 unop(Iop_1Uto32,
12970 binop(Iop_CmpEQ32, mkexpr(res32), mkU32(0))),
12971 mkU8(X86G_CC_SHIFT_Z)),
12972 binop(Iop_Shl32,
12973 unop(Iop_1Uto32,
12974 binop(Iop_CmpEQ32, mkexpr(src32), mkU32(0))),
12975 mkU8(X86G_CC_SHIFT_C))
12976 )
12977 );
12978
12979 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
12980 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
12981 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
12982 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(oszacp) ));
12983
12984 goto decode_success;
12985 }
12986
12987 /* ---------------------------------------------------- */
12988 /* --- end of the SSE4 decoder --- */
12989 /* ---------------------------------------------------- */
12990
12991 after_sse_decoders:
12992
12993 /* ---------------------------------------------------- */
12994 /* --- deal with misc 0x67 pfxs (addr size override) -- */
12995 /* ---------------------------------------------------- */
12996
12997 /* 67 E3 = JCXZ (for JECXZ see below) */
12998 if (insn[0] == 0x67 && insn[1] == 0xE3 && sz == 4) {
12999 delta += 2;
13000 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta);
13001 delta ++;
13002 stmt( IRStmt_Exit(
13003 binop(Iop_CmpEQ16, getIReg(2,R_ECX), mkU16(0)),
13004 Ijk_Boring,
13005 IRConst_U32(d32),
13006 OFFB_EIP
13007 ));
13008 DIP("jcxz 0x%x\n", d32);
13009 goto decode_success;
13010 }
13011
13012 /* ---------------------------------------------------- */
13013 /* --- start of the baseline insn decoder -- */
13014 /* ---------------------------------------------------- */
13015
13016 /* Get the primary opcode. */
13017 opc = getIByte(delta); delta++;
13018
13019 /* We get here if the current insn isn't SSE, or this CPU doesn't
13020 support SSE. */
13021
13022 switch (opc) {
13023
13024 /* ------------------------ Control flow --------------- */
13025
13026 case 0xC2: /* RET imm16 */
13027 d32 = getUDisp16(delta);
13028 delta += 2;
13029 dis_ret(&dres, d32);
13030 DIP("ret %d\n", (Int)d32);
13031 break;
13032 case 0xC3: /* RET */
13033 dis_ret(&dres, 0);
13034 DIP("ret\n");
13035 break;
13036
13037 case 0xCF: /* IRET */
13038 /* Note, this is an extremely kludgey and limited implementation
13039 of iret. All it really does is:
13040 popl %EIP; popl %CS; popl %EFLAGS.
13041 %CS is set but ignored (as it is in (eg) popw %cs)". */
13042 t1 = newTemp(Ity_I32); /* ESP */
13043 t2 = newTemp(Ity_I32); /* new EIP */
13044 t3 = newTemp(Ity_I32); /* new CS */
13045 t4 = newTemp(Ity_I32); /* new EFLAGS */
13046 assign(t1, getIReg(4,R_ESP));
13047 assign(t2, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t1),mkU32(0) )));
13048 assign(t3, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t1),mkU32(4) )));
13049 assign(t4, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t1),mkU32(8) )));
13050 /* Get stuff off stack */
13051 putIReg(4, R_ESP,binop(Iop_Add32, mkexpr(t1), mkU32(12)));
13052 /* set %CS (which is ignored anyway) */
13053 putSReg( R_CS, unop(Iop_32to16, mkexpr(t3)) );
13054 /* set %EFLAGS */
13055 set_EFLAGS_from_value( t4, False/*!emit_AC_emwarn*/, 0/*unused*/ );
13056 /* goto new EIP value */
13057 jmp_treg(&dres, Ijk_Ret, t2);
13058 vassert(dres.whatNext == Dis_StopHere);
13059 DIP("iret (very kludgey)\n");
13060 break;
13061
13062 case 0xE8: /* CALL J4 */
13063 d32 = getUDisp32(delta); delta += 4;
13064 d32 += (guest_EIP_bbstart+delta);
13065 /* (guest_eip_bbstart+delta) == return-to addr, d32 == call-to addr */
13066 if (d32 == guest_EIP_bbstart+delta && getIByte(delta) >= 0x58
13067 && getIByte(delta) <= 0x5F) {
13068 /* Specially treat the position-independent-code idiom
13069 call X
13070 X: popl %reg
13071 as
13072 movl %eip, %reg.
13073 since this generates better code, but for no other reason. */
13074 Int archReg = getIByte(delta) - 0x58;
13075 /* vex_printf("-- fPIC thingy\n"); */
13076 putIReg(4, archReg, mkU32(guest_EIP_bbstart+delta));
13077 delta++; /* Step over the POP */
13078 DIP("call 0x%x ; popl %s\n",d32,nameIReg(4,archReg));
13079 } else {
13080 /* The normal sequence for a call. */
13081 t1 = newTemp(Ity_I32);
13082 assign(t1, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4)));
13083 putIReg(4, R_ESP, mkexpr(t1));
13084 storeLE( mkexpr(t1), mkU32(guest_EIP_bbstart+delta));
13085 if (resteerOkFn( callback_opaque, (Addr64)(Addr32)d32 )) {
13086 /* follow into the call target. */
13087 dres.whatNext = Dis_ResteerU;
13088 dres.continueAt = (Addr64)(Addr32)d32;
13089 } else {
13090 jmp_lit(&dres, Ijk_Call, d32);
13091 vassert(dres.whatNext == Dis_StopHere);
13092 }
13093 DIP("call 0x%x\n",d32);
13094 }
13095 break;
13096
13097 //-- case 0xC8: /* ENTER */
13098 //-- d32 = getUDisp16(eip); eip += 2;
13099 //-- abyte = getIByte(delta); delta++;
13100 //--
13101 //-- vg_assert(sz == 4);
13102 //-- vg_assert(abyte == 0);
13103 //--
13104 //-- t1 = newTemp(cb); t2 = newTemp(cb);
13105 //-- uInstr2(cb, GET, sz, ArchReg, R_EBP, TempReg, t1);
13106 //-- uInstr2(cb, GET, 4, ArchReg, R_ESP, TempReg, t2);
13107 //-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2);
13108 //-- uLiteral(cb, sz);
13109 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP);
13110 //-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2);
13111 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_EBP);
13112 //-- if (d32) {
13113 //-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2);
13114 //-- uLiteral(cb, d32);
13115 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP);
13116 //-- }
13117 //-- DIP("enter 0x%x, 0x%x", d32, abyte);
13118 //-- break;
13119
13120 case 0xC9: /* LEAVE */
13121 vassert(sz == 4);
13122 t1 = newTemp(Ity_I32); t2 = newTemp(Ity_I32);
13123 assign(t1, getIReg(4,R_EBP));
13124 /* First PUT ESP looks redundant, but need it because ESP must
13125 always be up-to-date for Memcheck to work... */
13126 putIReg(4, R_ESP, mkexpr(t1));
13127 assign(t2, loadLE(Ity_I32,mkexpr(t1)));
13128 putIReg(4, R_EBP, mkexpr(t2));
13129 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t1), mkU32(4)) );
13130 DIP("leave\n");
13131 break;
13132
13133 /* ---------------- Misc weird-ass insns --------------- */
13134
13135 case 0x27: /* DAA */
13136 case 0x2F: /* DAS */
13137 case 0x37: /* AAA */
13138 case 0x3F: /* AAS */
13139 /* An ugly implementation for some ugly instructions. Oh
13140 well. */
13141 if (sz != 4) goto decode_failure;
13142 t1 = newTemp(Ity_I32);
13143 t2 = newTemp(Ity_I32);
13144 /* Make up a 32-bit value (t1), with the old value of AX in the
13145 bottom 16 bits, and the old OSZACP bitmask in the upper 16
13146 bits. */
13147 assign(t1,
13148 binop(Iop_16HLto32,
13149 unop(Iop_32to16,
13150 mk_x86g_calculate_eflags_all()),
13151 getIReg(2, R_EAX)
13152 ));
13153 /* Call the helper fn, to get a new AX and OSZACP value, and
13154 poke both back into the guest state. Also pass the helper
13155 the actual opcode so it knows which of the 4 instructions it
13156 is doing the computation for. */
13157 vassert(opc == 0x27 || opc == 0x2F || opc == 0x37 || opc == 0x3F);
13158 assign(t2,
13159 mkIRExprCCall(
13160 Ity_I32, 0/*regparm*/, "x86g_calculate_daa_das_aaa_aas",
13161 &x86g_calculate_daa_das_aaa_aas,
13162 mkIRExprVec_2( mkexpr(t1), mkU32( opc & 0xFF) )
13163 ));
13164 putIReg(2, R_EAX, unop(Iop_32to16, mkexpr(t2) ));
13165
13166 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
13167 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
13168 stmt( IRStmt_Put( OFFB_CC_DEP1,
13169 binop(Iop_And32,
13170 binop(Iop_Shr32, mkexpr(t2), mkU8(16)),
13171 mkU32( X86G_CC_MASK_C | X86G_CC_MASK_P
13172 | X86G_CC_MASK_A | X86G_CC_MASK_Z
13173 | X86G_CC_MASK_S| X86G_CC_MASK_O )
13174 )
13175 )
13176 );
13177 /* Set NDEP even though it isn't used. This makes redundant-PUT
13178 elimination of previous stores to this field work better. */
13179 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
13180 switch (opc) {
13181 case 0x27: DIP("daa\n"); break;
13182 case 0x2F: DIP("das\n"); break;
13183 case 0x37: DIP("aaa\n"); break;
13184 case 0x3F: DIP("aas\n"); break;
13185 default: vassert(0);
13186 }
13187 break;
13188
13189 case 0xD4: /* AAM */
13190 case 0xD5: /* AAD */
13191 d32 = getIByte(delta); delta++;
13192 if (sz != 4 || d32 != 10) goto decode_failure;
13193 t1 = newTemp(Ity_I32);
13194 t2 = newTemp(Ity_I32);
13195 /* Make up a 32-bit value (t1), with the old value of AX in the
13196 bottom 16 bits, and the old OSZACP bitmask in the upper 16
13197 bits. */
13198 assign(t1,
13199 binop(Iop_16HLto32,
13200 unop(Iop_32to16,
13201 mk_x86g_calculate_eflags_all()),
13202 getIReg(2, R_EAX)
13203 ));
13204 /* Call the helper fn, to get a new AX and OSZACP value, and
13205 poke both back into the guest state. Also pass the helper
13206 the actual opcode so it knows which of the 2 instructions it
13207 is doing the computation for. */
13208 assign(t2,
13209 mkIRExprCCall(
13210 Ity_I32, 0/*regparm*/, "x86g_calculate_aad_aam",
13211 &x86g_calculate_aad_aam,
13212 mkIRExprVec_2( mkexpr(t1), mkU32( opc & 0xFF) )
13213 ));
13214 putIReg(2, R_EAX, unop(Iop_32to16, mkexpr(t2) ));
13215
13216 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
13217 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
13218 stmt( IRStmt_Put( OFFB_CC_DEP1,
13219 binop(Iop_And32,
13220 binop(Iop_Shr32, mkexpr(t2), mkU8(16)),
13221 mkU32( X86G_CC_MASK_C | X86G_CC_MASK_P
13222 | X86G_CC_MASK_A | X86G_CC_MASK_Z
13223 | X86G_CC_MASK_S| X86G_CC_MASK_O )
13224 )
13225 )
13226 );
13227 /* Set NDEP even though it isn't used. This makes
13228 redundant-PUT elimination of previous stores to this field
13229 work better. */
13230 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
13231
13232 DIP(opc == 0xD4 ? "aam\n" : "aad\n");
13233 break;
13234
13235 /* ------------------------ CWD/CDQ -------------------- */
13236
13237 case 0x98: /* CBW */
13238 if (sz == 4) {
13239 putIReg(4, R_EAX, unop(Iop_16Sto32, getIReg(2, R_EAX)));
13240 DIP("cwde\n");
13241 } else {
13242 vassert(sz == 2);
13243 putIReg(2, R_EAX, unop(Iop_8Sto16, getIReg(1, R_EAX)));
13244 DIP("cbw\n");
13245 }
13246 break;
13247
13248 case 0x99: /* CWD/CDQ */
13249 ty = szToITy(sz);
13250 putIReg(sz, R_EDX,
13251 binop(mkSizedOp(ty,Iop_Sar8),
13252 getIReg(sz, R_EAX),
13253 mkU8(sz == 2 ? 15 : 31)) );
13254 DIP(sz == 2 ? "cwdq\n" : "cdqq\n");
13255 break;
13256
13257 /* ------------------------ FPU ops -------------------- */
13258
13259 case 0x9E: /* SAHF */
13260 codegen_SAHF();
13261 DIP("sahf\n");
13262 break;
13263
13264 case 0x9F: /* LAHF */
13265 codegen_LAHF();
13266 DIP("lahf\n");
13267 break;
13268
13269 case 0x9B: /* FWAIT */
13270 /* ignore? */
13271 DIP("fwait\n");
13272 break;
13273
13274 case 0xD8:
13275 case 0xD9:
13276 case 0xDA:
13277 case 0xDB:
13278 case 0xDC:
13279 case 0xDD:
13280 case 0xDE:
13281 case 0xDF: {
13282 Int delta0 = delta;
13283 Bool decode_OK = False;
13284 delta = dis_FPU ( &decode_OK, sorb, delta );
13285 if (!decode_OK) {
13286 delta = delta0;
13287 goto decode_failure;
13288 }
13289 break;
13290 }
13291
13292 /* ------------------------ INC & DEC ------------------ */
13293
13294 case 0x40: /* INC eAX */
13295 case 0x41: /* INC eCX */
13296 case 0x42: /* INC eDX */
13297 case 0x43: /* INC eBX */
13298 case 0x44: /* INC eSP */
13299 case 0x45: /* INC eBP */
13300 case 0x46: /* INC eSI */
13301 case 0x47: /* INC eDI */
13302 vassert(sz == 2 || sz == 4);
13303 ty = szToITy(sz);
13304 t1 = newTemp(ty);
13305 assign( t1, binop(mkSizedOp(ty,Iop_Add8),
13306 getIReg(sz, (UInt)(opc - 0x40)),
13307 mkU(ty,1)) );
13308 setFlags_INC_DEC( True, t1, ty );
13309 putIReg(sz, (UInt)(opc - 0x40), mkexpr(t1));
13310 DIP("inc%c %s\n", nameISize(sz), nameIReg(sz,opc-0x40));
13311 break;
13312
13313 case 0x48: /* DEC eAX */
13314 case 0x49: /* DEC eCX */
13315 case 0x4A: /* DEC eDX */
13316 case 0x4B: /* DEC eBX */
13317 case 0x4C: /* DEC eSP */
13318 case 0x4D: /* DEC eBP */
13319 case 0x4E: /* DEC eSI */
13320 case 0x4F: /* DEC eDI */
13321 vassert(sz == 2 || sz == 4);
13322 ty = szToITy(sz);
13323 t1 = newTemp(ty);
13324 assign( t1, binop(mkSizedOp(ty,Iop_Sub8),
13325 getIReg(sz, (UInt)(opc - 0x48)),
13326 mkU(ty,1)) );
13327 setFlags_INC_DEC( False, t1, ty );
13328 putIReg(sz, (UInt)(opc - 0x48), mkexpr(t1));
13329 DIP("dec%c %s\n", nameISize(sz), nameIReg(sz,opc-0x48));
13330 break;
13331
13332 /* ------------------------ INT ------------------------ */
13333
13334 case 0xCC: /* INT 3 */
13335 jmp_lit(&dres, Ijk_SigTRAP, ((Addr32)guest_EIP_bbstart)+delta);
13336 vassert(dres.whatNext == Dis_StopHere);
13337 DIP("int $0x3\n");
13338 break;
13339
13340 case 0xCD: /* INT imm8 */
13341 d32 = getIByte(delta); delta++;
13342
13343 /* For any of the cases where we emit a jump (that is, for all
13344 currently handled cases), it's important that all ArchRegs
13345 carry their up-to-date value at this point. So we declare an
13346 end-of-block here, which forces any TempRegs caching ArchRegs
13347 to be flushed. */
13348
13349 /* Handle int $0x3F .. $0x4F by synthesising a segfault and a
13350 restart of this instruction (hence the "-2" two lines below,
13351 to get the restart EIP to be this instruction. This is
13352 probably Linux-specific and it would be more correct to only
13353 do this if the VexAbiInfo says that is what we should do.
13354 This used to handle just 0x40-0x43; Jikes RVM uses a larger
13355 range (0x3F-0x49), and this allows some slack as well. */
13356 if (d32 >= 0x3F && d32 <= 0x4F) {
13357 jmp_lit(&dres, Ijk_SigSEGV, ((Addr32)guest_EIP_bbstart)+delta-2);
13358 vassert(dres.whatNext == Dis_StopHere);
13359 DIP("int $0x%x\n", (Int)d32);
13360 break;
13361 }
13362
13363 /* Handle int $0x80 (linux syscalls), int $0x81 and $0x82
13364 (darwin syscalls). As part of this, note where we are, so we
13365 can back up the guest to this point if the syscall needs to
13366 be restarted. */
13367 if (d32 == 0x80) {
13368 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL,
13369 mkU32(guest_EIP_curr_instr) ) );
13370 jmp_lit(&dres, Ijk_Sys_int128, ((Addr32)guest_EIP_bbstart)+delta);
13371 vassert(dres.whatNext == Dis_StopHere);
13372 DIP("int $0x80\n");
13373 break;
13374 }
13375 if (d32 == 0x81) {
13376 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL,
13377 mkU32(guest_EIP_curr_instr) ) );
13378 jmp_lit(&dres, Ijk_Sys_int129, ((Addr32)guest_EIP_bbstart)+delta);
13379 vassert(dres.whatNext == Dis_StopHere);
13380 DIP("int $0x81\n");
13381 break;
13382 }
13383 if (d32 == 0x82) {
13384 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL,
13385 mkU32(guest_EIP_curr_instr) ) );
13386 jmp_lit(&dres, Ijk_Sys_int130, ((Addr32)guest_EIP_bbstart)+delta);
13387 vassert(dres.whatNext == Dis_StopHere);
13388 DIP("int $0x82\n");
13389 break;
13390 }
13391
13392 /* none of the above */
13393 goto decode_failure;
13394
13395 /* ------------------------ Jcond, byte offset --------- */
13396
13397 case 0xEB: /* Jb (jump, byte offset) */
13398 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta);
13399 delta++;
13400 if (resteerOkFn( callback_opaque, (Addr64)(Addr32)d32) ) {
13401 dres.whatNext = Dis_ResteerU;
13402 dres.continueAt = (Addr64)(Addr32)d32;
13403 } else {
13404 jmp_lit(&dres, Ijk_Boring, d32);
13405 vassert(dres.whatNext == Dis_StopHere);
13406 }
13407 DIP("jmp-8 0x%x\n", d32);
13408 break;
13409
13410 case 0xE9: /* Jv (jump, 16/32 offset) */
13411 vassert(sz == 4); /* JRS added 2004 July 11 */
13412 d32 = (((Addr32)guest_EIP_bbstart)+delta+sz) + getSDisp(sz,delta);
13413 delta += sz;
13414 if (resteerOkFn( callback_opaque, (Addr64)(Addr32)d32) ) {
13415 dres.whatNext = Dis_ResteerU;
13416 dres.continueAt = (Addr64)(Addr32)d32;
13417 } else {
13418 jmp_lit(&dres, Ijk_Boring, d32);
13419 vassert(dres.whatNext == Dis_StopHere);
13420 }
13421 DIP("jmp 0x%x\n", d32);
13422 break;
13423
13424 case 0x70:
13425 case 0x71:
13426 case 0x72: /* JBb/JNAEb (jump below) */
13427 case 0x73: /* JNBb/JAEb (jump not below) */
13428 case 0x74: /* JZb/JEb (jump zero) */
13429 case 0x75: /* JNZb/JNEb (jump not zero) */
13430 case 0x76: /* JBEb/JNAb (jump below or equal) */
13431 case 0x77: /* JNBEb/JAb (jump not below or equal) */
13432 case 0x78: /* JSb (jump negative) */
13433 case 0x79: /* JSb (jump not negative) */
13434 case 0x7A: /* JP (jump parity even) */
13435 case 0x7B: /* JNP/JPO (jump parity odd) */
13436 case 0x7C: /* JLb/JNGEb (jump less) */
13437 case 0x7D: /* JGEb/JNLb (jump greater or equal) */
13438 case 0x7E: /* JLEb/JNGb (jump less or equal) */
13439 case 0x7F: /* JGb/JNLEb (jump greater) */
13440 { Int jmpDelta;
13441 const HChar* comment = "";
13442 jmpDelta = (Int)getSDisp8(delta);
13443 vassert(-128 <= jmpDelta && jmpDelta < 128);
13444 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + jmpDelta;
13445 delta++;
13446 if (resteerCisOk
13447 && vex_control.guest_chase_cond
13448 && (Addr32)d32 != (Addr32)guest_EIP_bbstart
13449 && jmpDelta < 0
13450 && resteerOkFn( callback_opaque, (Addr64)(Addr32)d32) ) {
13451 /* Speculation: assume this backward branch is taken. So we
13452 need to emit a side-exit to the insn following this one,
13453 on the negation of the condition, and continue at the
13454 branch target address (d32). If we wind up back at the
13455 first instruction of the trace, just stop; it's better to
13456 let the IR loop unroller handle that case. */
13457 stmt( IRStmt_Exit(
13458 mk_x86g_calculate_condition((X86Condcode)(1 ^ (opc - 0x70))),
13459 Ijk_Boring,
13460 IRConst_U32(guest_EIP_bbstart+delta),
13461 OFFB_EIP ) );
13462 dres.whatNext = Dis_ResteerC;
13463 dres.continueAt = (Addr64)(Addr32)d32;
13464 comment = "(assumed taken)";
13465 }
13466 else
13467 if (resteerCisOk
13468 && vex_control.guest_chase_cond
13469 && (Addr32)d32 != (Addr32)guest_EIP_bbstart
13470 && jmpDelta >= 0
13471 && resteerOkFn( callback_opaque,
13472 (Addr64)(Addr32)(guest_EIP_bbstart+delta)) ) {
13473 /* Speculation: assume this forward branch is not taken. So
13474 we need to emit a side-exit to d32 (the dest) and continue
13475 disassembling at the insn immediately following this
13476 one. */
13477 stmt( IRStmt_Exit(
13478 mk_x86g_calculate_condition((X86Condcode)(opc - 0x70)),
13479 Ijk_Boring,
13480 IRConst_U32(d32),
13481 OFFB_EIP ) );
13482 dres.whatNext = Dis_ResteerC;
13483 dres.continueAt = (Addr64)(Addr32)(guest_EIP_bbstart+delta);
13484 comment = "(assumed not taken)";
13485 }
13486 else {
13487 /* Conservative default translation - end the block at this
13488 point. */
13489 jcc_01( &dres, (X86Condcode)(opc - 0x70),
13490 (Addr32)(guest_EIP_bbstart+delta), d32);
13491 vassert(dres.whatNext == Dis_StopHere);
13492 }
13493 DIP("j%s-8 0x%x %s\n", name_X86Condcode(opc - 0x70), d32, comment);
13494 break;
13495 }
13496
13497 case 0xE3: /* JECXZ (for JCXZ see above) */
13498 if (sz != 4) goto decode_failure;
13499 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta);
13500 delta ++;
13501 stmt( IRStmt_Exit(
13502 binop(Iop_CmpEQ32, getIReg(4,R_ECX), mkU32(0)),
13503 Ijk_Boring,
13504 IRConst_U32(d32),
13505 OFFB_EIP
13506 ));
13507 DIP("jecxz 0x%x\n", d32);
13508 break;
13509
13510 case 0xE0: /* LOOPNE disp8: decrement count, jump if count != 0 && ZF==0 */
13511 case 0xE1: /* LOOPE disp8: decrement count, jump if count != 0 && ZF==1 */
13512 case 0xE2: /* LOOP disp8: decrement count, jump if count != 0 */
13513 { /* Again, the docs say this uses ECX/CX as a count depending on
13514 the address size override, not the operand one. Since we
13515 don't handle address size overrides, I guess that means
13516 ECX. */
13517 IRExpr* zbit = NULL;
13518 IRExpr* count = NULL;
13519 IRExpr* cond = NULL;
13520 const HChar* xtra = NULL;
13521
13522 if (sz != 4) goto decode_failure;
13523 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta);
13524 delta++;
13525 putIReg(4, R_ECX, binop(Iop_Sub32, getIReg(4,R_ECX), mkU32(1)));
13526
13527 count = getIReg(4,R_ECX);
13528 cond = binop(Iop_CmpNE32, count, mkU32(0));
13529 switch (opc) {
13530 case 0xE2:
13531 xtra = "";
13532 break;
13533 case 0xE1:
13534 xtra = "e";
13535 zbit = mk_x86g_calculate_condition( X86CondZ );
13536 cond = mkAnd1(cond, zbit);
13537 break;
13538 case 0xE0:
13539 xtra = "ne";
13540 zbit = mk_x86g_calculate_condition( X86CondNZ );
13541 cond = mkAnd1(cond, zbit);
13542 break;
13543 default:
13544 vassert(0);
13545 }
13546 stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U32(d32), OFFB_EIP) );
13547
13548 DIP("loop%s 0x%x\n", xtra, d32);
13549 break;
13550 }
13551
13552 /* ------------------------ IMUL ----------------------- */
13553
13554 case 0x69: /* IMUL Iv, Ev, Gv */
13555 delta = dis_imul_I_E_G ( sorb, sz, delta, sz );
13556 break;
13557 case 0x6B: /* IMUL Ib, Ev, Gv */
13558 delta = dis_imul_I_E_G ( sorb, sz, delta, 1 );
13559 break;
13560
13561 /* ------------------------ MOV ------------------------ */
13562
13563 case 0x88: /* MOV Gb,Eb */
13564 delta = dis_mov_G_E(sorb, 1, delta);
13565 break;
13566
13567 case 0x89: /* MOV Gv,Ev */
13568 delta = dis_mov_G_E(sorb, sz, delta);
13569 break;
13570
13571 case 0x8A: /* MOV Eb,Gb */
13572 delta = dis_mov_E_G(sorb, 1, delta);
13573 break;
13574
13575 case 0x8B: /* MOV Ev,Gv */
13576 delta = dis_mov_E_G(sorb, sz, delta);
13577 break;
13578
13579 case 0x8D: /* LEA M,Gv */
13580 if (sz != 4)
13581 goto decode_failure;
13582 modrm = getIByte(delta);
13583 if (epartIsReg(modrm))
13584 goto decode_failure;
13585 /* NOTE! this is the one place where a segment override prefix
13586 has no effect on the address calculation. Therefore we pass
13587 zero instead of sorb here. */
13588 addr = disAMode ( &alen, /*sorb*/ 0, delta, dis_buf );
13589 delta += alen;
13590 putIReg(sz, gregOfRM(modrm), mkexpr(addr));
13591 DIP("lea%c %s, %s\n", nameISize(sz), dis_buf,
13592 nameIReg(sz,gregOfRM(modrm)));
13593 break;
13594
13595 case 0x8C: /* MOV Sw,Ew -- MOV from a SEGMENT REGISTER */
13596 delta = dis_mov_Sw_Ew(sorb, sz, delta);
13597 break;
13598
13599 case 0x8E: /* MOV Ew,Sw -- MOV to a SEGMENT REGISTER */
13600 delta = dis_mov_Ew_Sw(sorb, delta);
13601 break;
13602
13603 case 0xA0: /* MOV Ob,AL */
13604 sz = 1;
13605 /* Fall through ... */
13606 case 0xA1: /* MOV Ov,eAX */
13607 d32 = getUDisp32(delta); delta += 4;
13608 ty = szToITy(sz);
13609 addr = newTemp(Ity_I32);
13610 assign( addr, handleSegOverride(sorb, mkU32(d32)) );
13611 putIReg(sz, R_EAX, loadLE(ty, mkexpr(addr)));
13612 DIP("mov%c %s0x%x, %s\n", nameISize(sz), sorbTxt(sorb),
13613 d32, nameIReg(sz,R_EAX));
13614 break;
13615
13616 case 0xA2: /* MOV Ob,AL */
13617 sz = 1;
13618 /* Fall through ... */
13619 case 0xA3: /* MOV eAX,Ov */
13620 d32 = getUDisp32(delta); delta += 4;
13621 ty = szToITy(sz);
13622 addr = newTemp(Ity_I32);
13623 assign( addr, handleSegOverride(sorb, mkU32(d32)) );
13624 storeLE( mkexpr(addr), getIReg(sz,R_EAX) );
13625 DIP("mov%c %s, %s0x%x\n", nameISize(sz), nameIReg(sz,R_EAX),
13626 sorbTxt(sorb), d32);
13627 break;
13628
13629 case 0xB0: /* MOV imm,AL */
13630 case 0xB1: /* MOV imm,CL */
13631 case 0xB2: /* MOV imm,DL */
13632 case 0xB3: /* MOV imm,BL */
13633 case 0xB4: /* MOV imm,AH */
13634 case 0xB5: /* MOV imm,CH */
13635 case 0xB6: /* MOV imm,DH */
13636 case 0xB7: /* MOV imm,BH */
13637 d32 = getIByte(delta); delta += 1;
13638 putIReg(1, opc-0xB0, mkU8(d32));
13639 DIP("movb $0x%x,%s\n", d32, nameIReg(1,opc-0xB0));
13640 break;
13641
13642 case 0xB8: /* MOV imm,eAX */
13643 case 0xB9: /* MOV imm,eCX */
13644 case 0xBA: /* MOV imm,eDX */
13645 case 0xBB: /* MOV imm,eBX */
13646 case 0xBC: /* MOV imm,eSP */
13647 case 0xBD: /* MOV imm,eBP */
13648 case 0xBE: /* MOV imm,eSI */
13649 case 0xBF: /* MOV imm,eDI */
13650 d32 = getUDisp(sz,delta); delta += sz;
13651 putIReg(sz, opc-0xB8, mkU(szToITy(sz), d32));
13652 DIP("mov%c $0x%x,%s\n", nameISize(sz), d32, nameIReg(sz,opc-0xB8));
13653 break;
13654
13655 case 0xC6: /* C6 /0 = MOV Ib,Eb */
13656 sz = 1;
13657 goto maybe_do_Mov_I_E;
13658 case 0xC7: /* C7 /0 = MOV Iv,Ev */
13659 goto maybe_do_Mov_I_E;
13660
13661 maybe_do_Mov_I_E:
13662 modrm = getIByte(delta);
13663 if (gregOfRM(modrm) == 0) {
13664 if (epartIsReg(modrm)) {
13665 delta++; /* mod/rm byte */
13666 d32 = getUDisp(sz,delta); delta += sz;
13667 putIReg(sz, eregOfRM(modrm), mkU(szToITy(sz), d32));
13668 DIP("mov%c $0x%x, %s\n", nameISize(sz), d32,
13669 nameIReg(sz,eregOfRM(modrm)));
13670 } else {
13671 addr = disAMode ( &alen, sorb, delta, dis_buf );
13672 delta += alen;
13673 d32 = getUDisp(sz,delta); delta += sz;
13674 storeLE(mkexpr(addr), mkU(szToITy(sz), d32));
13675 DIP("mov%c $0x%x, %s\n", nameISize(sz), d32, dis_buf);
13676 }
13677 break;
13678 }
13679 goto decode_failure;
13680
13681 /* ------------------------ opl imm, A ----------------- */
13682
13683 case 0x04: /* ADD Ib, AL */
13684 delta = dis_op_imm_A( 1, False, Iop_Add8, True, delta, "add" );
13685 break;
13686 case 0x05: /* ADD Iv, eAX */
13687 delta = dis_op_imm_A( sz, False, Iop_Add8, True, delta, "add" );
13688 break;
13689
13690 case 0x0C: /* OR Ib, AL */
13691 delta = dis_op_imm_A( 1, False, Iop_Or8, True, delta, "or" );
13692 break;
13693 case 0x0D: /* OR Iv, eAX */
13694 delta = dis_op_imm_A( sz, False, Iop_Or8, True, delta, "or" );
13695 break;
13696
13697 case 0x14: /* ADC Ib, AL */
13698 delta = dis_op_imm_A( 1, True, Iop_Add8, True, delta, "adc" );
13699 break;
13700 case 0x15: /* ADC Iv, eAX */
13701 delta = dis_op_imm_A( sz, True, Iop_Add8, True, delta, "adc" );
13702 break;
13703
13704 case 0x1C: /* SBB Ib, AL */
13705 delta = dis_op_imm_A( 1, True, Iop_Sub8, True, delta, "sbb" );
13706 break;
13707 case 0x1D: /* SBB Iv, eAX */
13708 delta = dis_op_imm_A( sz, True, Iop_Sub8, True, delta, "sbb" );
13709 break;
13710
13711 case 0x24: /* AND Ib, AL */
13712 delta = dis_op_imm_A( 1, False, Iop_And8, True, delta, "and" );
13713 break;
13714 case 0x25: /* AND Iv, eAX */
13715 delta = dis_op_imm_A( sz, False, Iop_And8, True, delta, "and" );
13716 break;
13717
13718 case 0x2C: /* SUB Ib, AL */
13719 delta = dis_op_imm_A( 1, False, Iop_Sub8, True, delta, "sub" );
13720 break;
13721 case 0x2D: /* SUB Iv, eAX */
13722 delta = dis_op_imm_A( sz, False, Iop_Sub8, True, delta, "sub" );
13723 break;
13724
13725 case 0x34: /* XOR Ib, AL */
13726 delta = dis_op_imm_A( 1, False, Iop_Xor8, True, delta, "xor" );
13727 break;
13728 case 0x35: /* XOR Iv, eAX */
13729 delta = dis_op_imm_A( sz, False, Iop_Xor8, True, delta, "xor" );
13730 break;
13731
13732 case 0x3C: /* CMP Ib, AL */
13733 delta = dis_op_imm_A( 1, False, Iop_Sub8, False, delta, "cmp" );
13734 break;
13735 case 0x3D: /* CMP Iv, eAX */
13736 delta = dis_op_imm_A( sz, False, Iop_Sub8, False, delta, "cmp" );
13737 break;
13738
13739 case 0xA8: /* TEST Ib, AL */
13740 delta = dis_op_imm_A( 1, False, Iop_And8, False, delta, "test" );
13741 break;
13742 case 0xA9: /* TEST Iv, eAX */
13743 delta = dis_op_imm_A( sz, False, Iop_And8, False, delta, "test" );
13744 break;
13745
13746 /* ------------------------ opl Ev, Gv ----------------- */
13747
13748 case 0x02: /* ADD Eb,Gb */
13749 delta = dis_op2_E_G ( sorb, False, Iop_Add8, True, 1, delta, "add" );
13750 break;
13751 case 0x03: /* ADD Ev,Gv */
13752 delta = dis_op2_E_G ( sorb, False, Iop_Add8, True, sz, delta, "add" );
13753 break;
13754
13755 case 0x0A: /* OR Eb,Gb */
13756 delta = dis_op2_E_G ( sorb, False, Iop_Or8, True, 1, delta, "or" );
13757 break;
13758 case 0x0B: /* OR Ev,Gv */
13759 delta = dis_op2_E_G ( sorb, False, Iop_Or8, True, sz, delta, "or" );
13760 break;
13761
13762 case 0x12: /* ADC Eb,Gb */
13763 delta = dis_op2_E_G ( sorb, True, Iop_Add8, True, 1, delta, "adc" );
13764 break;
13765 case 0x13: /* ADC Ev,Gv */
13766 delta = dis_op2_E_G ( sorb, True, Iop_Add8, True, sz, delta, "adc" );
13767 break;
13768
13769 case 0x1A: /* SBB Eb,Gb */
13770 delta = dis_op2_E_G ( sorb, True, Iop_Sub8, True, 1, delta, "sbb" );
13771 break;
13772 case 0x1B: /* SBB Ev,Gv */
13773 delta = dis_op2_E_G ( sorb, True, Iop_Sub8, True, sz, delta, "sbb" );
13774 break;
13775
13776 case 0x22: /* AND Eb,Gb */
13777 delta = dis_op2_E_G ( sorb, False, Iop_And8, True, 1, delta, "and" );
13778 break;
13779 case 0x23: /* AND Ev,Gv */
13780 delta = dis_op2_E_G ( sorb, False, Iop_And8, True, sz, delta, "and" );
13781 break;
13782
13783 case 0x2A: /* SUB Eb,Gb */
13784 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, True, 1, delta, "sub" );
13785 break;
13786 case 0x2B: /* SUB Ev,Gv */
13787 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, True, sz, delta, "sub" );
13788 break;
13789
13790 case 0x32: /* XOR Eb,Gb */
13791 delta = dis_op2_E_G ( sorb, False, Iop_Xor8, True, 1, delta, "xor" );
13792 break;
13793 case 0x33: /* XOR Ev,Gv */
13794 delta = dis_op2_E_G ( sorb, False, Iop_Xor8, True, sz, delta, "xor" );
13795 break;
13796
13797 case 0x3A: /* CMP Eb,Gb */
13798 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, False, 1, delta, "cmp" );
13799 break;
13800 case 0x3B: /* CMP Ev,Gv */
13801 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, False, sz, delta, "cmp" );
13802 break;
13803
13804 case 0x84: /* TEST Eb,Gb */
13805 delta = dis_op2_E_G ( sorb, False, Iop_And8, False, 1, delta, "test" );
13806 break;
13807 case 0x85: /* TEST Ev,Gv */
13808 delta = dis_op2_E_G ( sorb, False, Iop_And8, False, sz, delta, "test" );
13809 break;
13810
13811 /* ------------------------ opl Gv, Ev ----------------- */
13812
13813 case 0x00: /* ADD Gb,Eb */
13814 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13815 Iop_Add8, True, 1, delta, "add" );
13816 break;
13817 case 0x01: /* ADD Gv,Ev */
13818 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13819 Iop_Add8, True, sz, delta, "add" );
13820 break;
13821
13822 case 0x08: /* OR Gb,Eb */
13823 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13824 Iop_Or8, True, 1, delta, "or" );
13825 break;
13826 case 0x09: /* OR Gv,Ev */
13827 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13828 Iop_Or8, True, sz, delta, "or" );
13829 break;
13830
13831 case 0x10: /* ADC Gb,Eb */
13832 delta = dis_op2_G_E ( sorb, pfx_lock, True,
13833 Iop_Add8, True, 1, delta, "adc" );
13834 break;
13835 case 0x11: /* ADC Gv,Ev */
13836 delta = dis_op2_G_E ( sorb, pfx_lock, True,
13837 Iop_Add8, True, sz, delta, "adc" );
13838 break;
13839
13840 case 0x18: /* SBB Gb,Eb */
13841 delta = dis_op2_G_E ( sorb, pfx_lock, True,
13842 Iop_Sub8, True, 1, delta, "sbb" );
13843 break;
13844 case 0x19: /* SBB Gv,Ev */
13845 delta = dis_op2_G_E ( sorb, pfx_lock, True,
13846 Iop_Sub8, True, sz, delta, "sbb" );
13847 break;
13848
13849 case 0x20: /* AND Gb,Eb */
13850 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13851 Iop_And8, True, 1, delta, "and" );
13852 break;
13853 case 0x21: /* AND Gv,Ev */
13854 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13855 Iop_And8, True, sz, delta, "and" );
13856 break;
13857
13858 case 0x28: /* SUB Gb,Eb */
13859 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13860 Iop_Sub8, True, 1, delta, "sub" );
13861 break;
13862 case 0x29: /* SUB Gv,Ev */
13863 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13864 Iop_Sub8, True, sz, delta, "sub" );
13865 break;
13866
13867 case 0x30: /* XOR Gb,Eb */
13868 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13869 Iop_Xor8, True, 1, delta, "xor" );
13870 break;
13871 case 0x31: /* XOR Gv,Ev */
13872 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13873 Iop_Xor8, True, sz, delta, "xor" );
13874 break;
13875
13876 case 0x38: /* CMP Gb,Eb */
13877 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13878 Iop_Sub8, False, 1, delta, "cmp" );
13879 break;
13880 case 0x39: /* CMP Gv,Ev */
13881 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13882 Iop_Sub8, False, sz, delta, "cmp" );
13883 break;
13884
13885 /* ------------------------ POP ------------------------ */
13886
13887 case 0x58: /* POP eAX */
13888 case 0x59: /* POP eCX */
13889 case 0x5A: /* POP eDX */
13890 case 0x5B: /* POP eBX */
13891 case 0x5D: /* POP eBP */
13892 case 0x5E: /* POP eSI */
13893 case 0x5F: /* POP eDI */
13894 case 0x5C: /* POP eSP */
13895 vassert(sz == 2 || sz == 4);
13896 t1 = newTemp(szToITy(sz)); t2 = newTemp(Ity_I32);
13897 assign(t2, getIReg(4, R_ESP));
13898 assign(t1, loadLE(szToITy(sz),mkexpr(t2)));
13899 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t2), mkU32(sz)));
13900 putIReg(sz, opc-0x58, mkexpr(t1));
13901 DIP("pop%c %s\n", nameISize(sz), nameIReg(sz,opc-0x58));
13902 break;
13903
13904 case 0x9D: /* POPF */
13905 vassert(sz == 2 || sz == 4);
13906 t1 = newTemp(Ity_I32); t2 = newTemp(Ity_I32);
13907 assign(t2, getIReg(4, R_ESP));
13908 assign(t1, widenUto32(loadLE(szToITy(sz),mkexpr(t2))));
13909 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t2), mkU32(sz)));
13910
13911 /* Generate IR to set %EFLAGS{O,S,Z,A,C,P,D,ID,AC} from the
13912 value in t1. */
13913 set_EFLAGS_from_value( t1, True/*emit_AC_emwarn*/,
13914 ((Addr32)guest_EIP_bbstart)+delta );
13915
13916 DIP("popf%c\n", nameISize(sz));
13917 break;
13918
13919 case 0x61: /* POPA */
13920 /* This is almost certainly wrong for sz==2. So ... */
13921 if (sz != 4) goto decode_failure;
13922
13923 /* t5 is the old %ESP value. */
13924 t5 = newTemp(Ity_I32);
13925 assign( t5, getIReg(4, R_ESP) );
13926
13927 /* Reload all the registers, except %esp. */
13928 putIReg(4,R_EAX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(28)) ));
13929 putIReg(4,R_ECX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(24)) ));
13930 putIReg(4,R_EDX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(20)) ));
13931 putIReg(4,R_EBX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(16)) ));
13932 /* ignore saved %ESP */
13933 putIReg(4,R_EBP, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 8)) ));
13934 putIReg(4,R_ESI, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 4)) ));
13935 putIReg(4,R_EDI, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 0)) ));
13936
13937 /* and move %ESP back up */
13938 putIReg( 4, R_ESP, binop(Iop_Add32, mkexpr(t5), mkU32(8*4)) );
13939
13940 DIP("popa%c\n", nameISize(sz));
13941 break;
13942
13943 case 0x8F: /* POPL/POPW m32 */
13944 { Int len;
13945 UChar rm = getIByte(delta);
13946
13947 /* make sure this instruction is correct POP */
13948 if (epartIsReg(rm) || gregOfRM(rm) != 0)
13949 goto decode_failure;
13950 /* and has correct size */
13951 if (sz != 4 && sz != 2)
13952 goto decode_failure;
13953 ty = szToITy(sz);
13954
13955 t1 = newTemp(Ity_I32); /* stack address */
13956 t3 = newTemp(ty); /* data */
13957 /* set t1 to ESP: t1 = ESP */
13958 assign( t1, getIReg(4, R_ESP) );
13959 /* load M[ESP] to virtual register t3: t3 = M[t1] */
13960 assign( t3, loadLE(ty, mkexpr(t1)) );
13961
13962 /* increase ESP; must be done before the STORE. Intel manual says:
13963 If the ESP register is used as a base register for addressing
13964 a destination operand in memory, the POP instruction computes
13965 the effective address of the operand after it increments the
13966 ESP register.
13967 */
13968 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t1), mkU32(sz)) );
13969
13970 /* resolve MODR/M */
13971 addr = disAMode ( &len, sorb, delta, dis_buf);
13972 storeLE( mkexpr(addr), mkexpr(t3) );
13973
13974 DIP("pop%c %s\n", sz==2 ? 'w' : 'l', dis_buf);
13975
13976 delta += len;
13977 break;
13978 }
13979
13980 case 0x1F: /* POP %DS */
13981 dis_pop_segreg( R_DS, sz ); break;
13982 case 0x07: /* POP %ES */
13983 dis_pop_segreg( R_ES, sz ); break;
13984 case 0x17: /* POP %SS */
13985 dis_pop_segreg( R_SS, sz ); break;
13986
13987 /* ------------------------ PUSH ----------------------- */
13988
13989 case 0x50: /* PUSH eAX */
13990 case 0x51: /* PUSH eCX */
13991 case 0x52: /* PUSH eDX */
13992 case 0x53: /* PUSH eBX */
13993 case 0x55: /* PUSH eBP */
13994 case 0x56: /* PUSH eSI */
13995 case 0x57: /* PUSH eDI */
13996 case 0x54: /* PUSH eSP */
13997 /* This is the Right Way, in that the value to be pushed is
13998 established before %esp is changed, so that pushl %esp
13999 correctly pushes the old value. */
14000 vassert(sz == 2 || sz == 4);
14001 ty = sz==2 ? Ity_I16 : Ity_I32;
14002 t1 = newTemp(ty); t2 = newTemp(Ity_I32);
14003 assign(t1, getIReg(sz, opc-0x50));
14004 assign(t2, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)));
14005 putIReg(4, R_ESP, mkexpr(t2) );
14006 storeLE(mkexpr(t2),mkexpr(t1));
14007 DIP("push%c %s\n", nameISize(sz), nameIReg(sz,opc-0x50));
14008 break;
14009
14010
14011 case 0x68: /* PUSH Iv */
14012 d32 = getUDisp(sz,delta); delta += sz;
14013 goto do_push_I;
14014 case 0x6A: /* PUSH Ib, sign-extended to sz */
14015 d32 = getSDisp8(delta); delta += 1;
14016 goto do_push_I;
14017 do_push_I:
14018 ty = szToITy(sz);
14019 t1 = newTemp(Ity_I32); t2 = newTemp(ty);
14020 assign( t1, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) );
14021 putIReg(4, R_ESP, mkexpr(t1) );
14022 /* stop mkU16 asserting if d32 is a negative 16-bit number
14023 (bug #132813) */
14024 if (ty == Ity_I16)
14025 d32 &= 0xFFFF;
14026 storeLE( mkexpr(t1), mkU(ty,d32) );
14027 DIP("push%c $0x%x\n", nameISize(sz), d32);
14028 break;
14029
14030 case 0x9C: /* PUSHF */ {
14031 vassert(sz == 2 || sz == 4);
14032
14033 t1 = newTemp(Ity_I32);
14034 assign( t1, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) );
14035 putIReg(4, R_ESP, mkexpr(t1) );
14036
14037 /* Calculate OSZACP, and patch in fixed fields as per
14038 Intel docs.
14039 - bit 1 is always 1
14040 - bit 9 is Interrupt Enable (should always be 1 in user mode?)
14041 */
14042 t2 = newTemp(Ity_I32);
14043 assign( t2, binop(Iop_Or32,
14044 mk_x86g_calculate_eflags_all(),
14045 mkU32( (1<<1)|(1<<9) ) ));
14046
14047 /* Patch in the D flag. This can simply be a copy of bit 10 of
14048 baseBlock[OFFB_DFLAG]. */
14049 t3 = newTemp(Ity_I32);
14050 assign( t3, binop(Iop_Or32,
14051 mkexpr(t2),
14052 binop(Iop_And32,
14053 IRExpr_Get(OFFB_DFLAG,Ity_I32),
14054 mkU32(1<<10)))
14055 );
14056
14057 /* And patch in the ID flag. */
14058 t4 = newTemp(Ity_I32);
14059 assign( t4, binop(Iop_Or32,
14060 mkexpr(t3),
14061 binop(Iop_And32,
14062 binop(Iop_Shl32, IRExpr_Get(OFFB_IDFLAG,Ity_I32),
14063 mkU8(21)),
14064 mkU32(1<<21)))
14065 );
14066
14067 /* And patch in the AC flag. */
14068 t5 = newTemp(Ity_I32);
14069 assign( t5, binop(Iop_Or32,
14070 mkexpr(t4),
14071 binop(Iop_And32,
14072 binop(Iop_Shl32, IRExpr_Get(OFFB_ACFLAG,Ity_I32),
14073 mkU8(18)),
14074 mkU32(1<<18)))
14075 );
14076
14077 /* if sz==2, the stored value needs to be narrowed. */
14078 if (sz == 2)
14079 storeLE( mkexpr(t1), unop(Iop_32to16,mkexpr(t5)) );
14080 else
14081 storeLE( mkexpr(t1), mkexpr(t5) );
14082
14083 DIP("pushf%c\n", nameISize(sz));
14084 break;
14085 }
14086
14087 case 0x60: /* PUSHA */
14088 /* This is almost certainly wrong for sz==2. So ... */
14089 if (sz != 4) goto decode_failure;
14090
14091 /* This is the Right Way, in that the value to be pushed is
14092 established before %esp is changed, so that pusha
14093 correctly pushes the old %esp value. New value of %esp is
14094 pushed at start. */
14095 /* t0 is the %ESP value we're going to push. */
14096 t0 = newTemp(Ity_I32);
14097 assign( t0, getIReg(4, R_ESP) );
14098
14099 /* t5 will be the new %ESP value. */
14100 t5 = newTemp(Ity_I32);
14101 assign( t5, binop(Iop_Sub32, mkexpr(t0), mkU32(8*4)) );
14102
14103 /* Update guest state before prodding memory. */
14104 putIReg(4, R_ESP, mkexpr(t5));
14105
14106 /* Dump all the registers. */
14107 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(28)), getIReg(4,R_EAX) );
14108 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(24)), getIReg(4,R_ECX) );
14109 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(20)), getIReg(4,R_EDX) );
14110 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(16)), getIReg(4,R_EBX) );
14111 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(12)), mkexpr(t0) /*esp*/);
14112 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 8)), getIReg(4,R_EBP) );
14113 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 4)), getIReg(4,R_ESI) );
14114 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 0)), getIReg(4,R_EDI) );
14115
14116 DIP("pusha%c\n", nameISize(sz));
14117 break;
14118
14119 case 0x0E: /* PUSH %CS */
14120 dis_push_segreg( R_CS, sz ); break;
14121 case 0x1E: /* PUSH %DS */
14122 dis_push_segreg( R_DS, sz ); break;
14123 case 0x06: /* PUSH %ES */
14124 dis_push_segreg( R_ES, sz ); break;
14125 case 0x16: /* PUSH %SS */
14126 dis_push_segreg( R_SS, sz ); break;
14127
14128 /* ------------------------ SCAS et al ----------------- */
14129
14130 case 0xA4: /* MOVS, no REP prefix */
14131 case 0xA5:
14132 if (sorb != 0)
14133 goto decode_failure; /* else dis_string_op asserts */
14134 dis_string_op( dis_MOVS, ( opc == 0xA4 ? 1 : sz ), "movs", sorb );
14135 break;
14136
14137 case 0xA6: /* CMPSb, no REP prefix */
14138 case 0xA7:
14139 if (sorb != 0)
14140 goto decode_failure; /* else dis_string_op asserts */
14141 dis_string_op( dis_CMPS, ( opc == 0xA6 ? 1 : sz ), "cmps", sorb );
14142 break;
14143
14144 case 0xAA: /* STOS, no REP prefix */
14145 case 0xAB:
14146 if (sorb != 0)
14147 goto decode_failure; /* else dis_string_op asserts */
14148 dis_string_op( dis_STOS, ( opc == 0xAA ? 1 : sz ), "stos", sorb );
14149 break;
14150
14151 case 0xAC: /* LODS, no REP prefix */
14152 case 0xAD:
14153 if (sorb != 0)
14154 goto decode_failure; /* else dis_string_op asserts */
14155 dis_string_op( dis_LODS, ( opc == 0xAC ? 1 : sz ), "lods", sorb );
14156 break;
14157
14158 case 0xAE: /* SCAS, no REP prefix */
14159 case 0xAF:
14160 if (sorb != 0)
14161 goto decode_failure; /* else dis_string_op asserts */
14162 dis_string_op( dis_SCAS, ( opc == 0xAE ? 1 : sz ), "scas", sorb );
14163 break;
14164
14165
14166 case 0xFC: /* CLD */
14167 stmt( IRStmt_Put( OFFB_DFLAG, mkU32(1)) );
14168 DIP("cld\n");
14169 break;
14170
14171 case 0xFD: /* STD */
14172 stmt( IRStmt_Put( OFFB_DFLAG, mkU32(0xFFFFFFFF)) );
14173 DIP("std\n");
14174 break;
14175
14176 case 0xF8: /* CLC */
14177 case 0xF9: /* STC */
14178 case 0xF5: /* CMC */
14179 t0 = newTemp(Ity_I32);
14180 t1 = newTemp(Ity_I32);
14181 assign( t0, mk_x86g_calculate_eflags_all() );
14182 switch (opc) {
14183 case 0xF8:
14184 assign( t1, binop(Iop_And32, mkexpr(t0),
14185 mkU32(~X86G_CC_MASK_C)));
14186 DIP("clc\n");
14187 break;
14188 case 0xF9:
14189 assign( t1, binop(Iop_Or32, mkexpr(t0),
14190 mkU32(X86G_CC_MASK_C)));
14191 DIP("stc\n");
14192 break;
14193 case 0xF5:
14194 assign( t1, binop(Iop_Xor32, mkexpr(t0),
14195 mkU32(X86G_CC_MASK_C)));
14196 DIP("cmc\n");
14197 break;
14198 default:
14199 vpanic("disInstr(x86)(clc/stc/cmc)");
14200 }
14201 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
14202 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
14203 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t1) ));
14204 /* Set NDEP even though it isn't used. This makes redundant-PUT
14205 elimination of previous stores to this field work better. */
14206 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
14207 break;
14208
14209 case 0xD6: /* SALC */
14210 t0 = newTemp(Ity_I32);
14211 t1 = newTemp(Ity_I32);
14212 assign( t0, binop(Iop_And32,
14213 mk_x86g_calculate_eflags_c(),
14214 mkU32(1)) );
14215 assign( t1, binop(Iop_Sar32,
14216 binop(Iop_Shl32, mkexpr(t0), mkU8(31)),
14217 mkU8(31)) );
14218 putIReg(1, R_EAX, unop(Iop_32to8, mkexpr(t1)) );
14219 DIP("salc\n");
14220 break;
14221
14222 /* REPNE prefix insn */
14223 case 0xF2: {
14224 Addr32 eip_orig = guest_EIP_bbstart + delta_start;
14225 if (sorb != 0) goto decode_failure;
14226 abyte = getIByte(delta); delta++;
14227
14228 if (abyte == 0x66) { sz = 2; abyte = getIByte(delta); delta++; }
14229
14230 switch (abyte) {
14231 /* According to the Intel manual, "repne movs" should never occur, but
14232 * in practice it has happened, so allow for it here... */
14233 case 0xA4: sz = 1; /* REPNE MOVS<sz> */
14234 case 0xA5:
14235 dis_REP_op ( &dres, X86CondNZ, dis_MOVS, sz, eip_orig,
14236 guest_EIP_bbstart+delta, "repne movs" );
14237 break;
14238
14239 case 0xA6: sz = 1; /* REPNE CMP<sz> */
14240 case 0xA7:
14241 dis_REP_op ( &dres, X86CondNZ, dis_CMPS, sz, eip_orig,
14242 guest_EIP_bbstart+delta, "repne cmps" );
14243 break;
14244
14245 case 0xAA: sz = 1; /* REPNE STOS<sz> */
14246 case 0xAB:
14247 dis_REP_op ( &dres, X86CondNZ, dis_STOS, sz, eip_orig,
14248 guest_EIP_bbstart+delta, "repne stos" );
14249 break;
14250
14251 case 0xAE: sz = 1; /* REPNE SCAS<sz> */
14252 case 0xAF:
14253 dis_REP_op ( &dres, X86CondNZ, dis_SCAS, sz, eip_orig,
14254 guest_EIP_bbstart+delta, "repne scas" );
14255 break;
14256
14257 default:
14258 goto decode_failure;
14259 }
14260 break;
14261 }
14262
14263 /* REP/REPE prefix insn (for SCAS and CMPS, 0xF3 means REPE,
14264 for the rest, it means REP) */
14265 case 0xF3: {
14266 Addr32 eip_orig = guest_EIP_bbstart + delta_start;
14267 abyte = getIByte(delta); delta++;
14268
14269 if (abyte == 0x66) { sz = 2; abyte = getIByte(delta); delta++; }
14270
14271 if (sorb != 0 && abyte != 0x0F) goto decode_failure;
14272
14273 switch (abyte) {
14274 case 0x0F:
14275 switch (getIByte(delta)) {
14276 /* On older CPUs, TZCNT behaves the same as BSF. */
14277 case 0xBC: /* REP BSF Gv,Ev */
14278 delta = dis_bs_E_G ( sorb, sz, delta + 1, True );
14279 break;
14280 /* On older CPUs, LZCNT behaves the same as BSR. */
14281 case 0xBD: /* REP BSR Gv,Ev */
14282 delta = dis_bs_E_G ( sorb, sz, delta + 1, False );
14283 break;
14284 default:
14285 goto decode_failure;
14286 }
14287 break;
14288
14289 case 0xA4: sz = 1; /* REP MOVS<sz> */
14290 case 0xA5:
14291 dis_REP_op ( &dres, X86CondAlways, dis_MOVS, sz, eip_orig,
14292 guest_EIP_bbstart+delta, "rep movs" );
14293 break;
14294
14295 case 0xA6: sz = 1; /* REPE CMP<sz> */
14296 case 0xA7:
14297 dis_REP_op ( &dres, X86CondZ, dis_CMPS, sz, eip_orig,
14298 guest_EIP_bbstart+delta, "repe cmps" );
14299 break;
14300
14301 case 0xAA: sz = 1; /* REP STOS<sz> */
14302 case 0xAB:
14303 dis_REP_op ( &dres, X86CondAlways, dis_STOS, sz, eip_orig,
14304 guest_EIP_bbstart+delta, "rep stos" );
14305 break;
14306
14307 case 0xAC: sz = 1; /* REP LODS<sz> */
14308 case 0xAD:
14309 dis_REP_op ( &dres, X86CondAlways, dis_LODS, sz, eip_orig,
14310 guest_EIP_bbstart+delta, "rep lods" );
14311 break;
14312
14313 case 0xAE: sz = 1; /* REPE SCAS<sz> */
14314 case 0xAF:
14315 dis_REP_op ( &dres, X86CondZ, dis_SCAS, sz, eip_orig,
14316 guest_EIP_bbstart+delta, "repe scas" );
14317 break;
14318
14319 case 0x90: /* REP NOP (PAUSE) */
14320 /* a hint to the P4 re spin-wait loop */
14321 DIP("rep nop (P4 pause)\n");
14322 /* "observe" the hint. The Vex client needs to be careful not
14323 to cause very long delays as a result, though. */
14324 jmp_lit(&dres, Ijk_Yield, ((Addr32)guest_EIP_bbstart)+delta);
14325 vassert(dres.whatNext == Dis_StopHere);
14326 break;
14327
14328 case 0xC3: /* REP RET -- same as normal ret? */
14329 dis_ret(&dres, 0);
14330 DIP("rep ret\n");
14331 break;
14332
14333 default:
14334 goto decode_failure;
14335 }
14336 break;
14337 }
14338
14339 /* ------------------------ XCHG ----------------------- */
14340
14341 /* XCHG reg,mem automatically asserts LOCK# even without a LOCK
14342 prefix; hence it must be translated with an IRCAS (at least, the
14343 memory variant). */
14344 case 0x86: /* XCHG Gb,Eb */
14345 sz = 1;
14346 /* Fall through ... */
14347 case 0x87: /* XCHG Gv,Ev */
14348 modrm = getIByte(delta);
14349 ty = szToITy(sz);
14350 t1 = newTemp(ty); t2 = newTemp(ty);
14351 if (epartIsReg(modrm)) {
14352 assign(t1, getIReg(sz, eregOfRM(modrm)));
14353 assign(t2, getIReg(sz, gregOfRM(modrm)));
14354 putIReg(sz, gregOfRM(modrm), mkexpr(t1));
14355 putIReg(sz, eregOfRM(modrm), mkexpr(t2));
14356 delta++;
14357 DIP("xchg%c %s, %s\n",
14358 nameISize(sz), nameIReg(sz,gregOfRM(modrm)),
14359 nameIReg(sz,eregOfRM(modrm)));
14360 } else {
14361 *expect_CAS = True;
14362 addr = disAMode ( &alen, sorb, delta, dis_buf );
14363 assign( t1, loadLE(ty,mkexpr(addr)) );
14364 assign( t2, getIReg(sz,gregOfRM(modrm)) );
14365 casLE( mkexpr(addr),
14366 mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr );
14367 putIReg( sz, gregOfRM(modrm), mkexpr(t1) );
14368 delta += alen;
14369 DIP("xchg%c %s, %s\n", nameISize(sz),
14370 nameIReg(sz,gregOfRM(modrm)), dis_buf);
14371 }
14372 break;
14373
14374 case 0x90: /* XCHG eAX,eAX */
14375 DIP("nop\n");
14376 break;
14377 case 0x91: /* XCHG eAX,eCX */
14378 case 0x92: /* XCHG eAX,eDX */
14379 case 0x93: /* XCHG eAX,eBX */
14380 case 0x94: /* XCHG eAX,eSP */
14381 case 0x95: /* XCHG eAX,eBP */
14382 case 0x96: /* XCHG eAX,eSI */
14383 case 0x97: /* XCHG eAX,eDI */
14384 codegen_xchg_eAX_Reg ( sz, opc - 0x90 );
14385 break;
14386
14387 /* ------------------------ XLAT ----------------------- */
14388
14389 case 0xD7: /* XLAT */
14390 if (sz != 4) goto decode_failure; /* sz == 2 is also allowed (0x66) */
14391 putIReg(
14392 1,
14393 R_EAX/*AL*/,
14394 loadLE(Ity_I8,
14395 handleSegOverride(
14396 sorb,
14397 binop(Iop_Add32,
14398 getIReg(4, R_EBX),
14399 unop(Iop_8Uto32, getIReg(1, R_EAX/*AL*/))))));
14400
14401 DIP("xlat%c [ebx]\n", nameISize(sz));
14402 break;
14403
14404 /* ------------------------ IN / OUT ----------------------- */
14405
14406 case 0xE4: /* IN imm8, AL */
14407 sz = 1;
14408 t1 = newTemp(Ity_I32);
14409 abyte = getIByte(delta); delta++;
14410 assign(t1, mkU32( abyte & 0xFF ));
14411 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIReg(sz,R_EAX));
14412 goto do_IN;
14413 case 0xE5: /* IN imm8, eAX */
14414 vassert(sz == 2 || sz == 4);
14415 t1 = newTemp(Ity_I32);
14416 abyte = getIByte(delta); delta++;
14417 assign(t1, mkU32( abyte & 0xFF ));
14418 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIReg(sz,R_EAX));
14419 goto do_IN;
14420 case 0xEC: /* IN %DX, AL */
14421 sz = 1;
14422 t1 = newTemp(Ity_I32);
14423 assign(t1, unop(Iop_16Uto32, getIReg(2, R_EDX)));
14424 DIP("in%c %s,%s\n", nameISize(sz), nameIReg(2,R_EDX),
14425 nameIReg(sz,R_EAX));
14426 goto do_IN;
14427 case 0xED: /* IN %DX, eAX */
14428 vassert(sz == 2 || sz == 4);
14429 t1 = newTemp(Ity_I32);
14430 assign(t1, unop(Iop_16Uto32, getIReg(2, R_EDX)));
14431 DIP("in%c %s,%s\n", nameISize(sz), nameIReg(2,R_EDX),
14432 nameIReg(sz,R_EAX));
14433 goto do_IN;
14434 do_IN: {
14435 /* At this point, sz indicates the width, and t1 is a 32-bit
14436 value giving port number. */
14437 IRDirty* d;
14438 vassert(sz == 1 || sz == 2 || sz == 4);
14439 ty = szToITy(sz);
14440 t2 = newTemp(Ity_I32);
14441 d = unsafeIRDirty_1_N(
14442 t2,
14443 0/*regparms*/,
14444 "x86g_dirtyhelper_IN",
14445 &x86g_dirtyhelper_IN,
14446 mkIRExprVec_2( mkexpr(t1), mkU32(sz) )
14447 );
14448 /* do the call, dumping the result in t2. */
14449 stmt( IRStmt_Dirty(d) );
14450 putIReg(sz, R_EAX, narrowTo( ty, mkexpr(t2) ) );
14451 break;
14452 }
14453
14454 case 0xE6: /* OUT AL, imm8 */
14455 sz = 1;
14456 t1 = newTemp(Ity_I32);
14457 abyte = getIByte(delta); delta++;
14458 assign( t1, mkU32( abyte & 0xFF ) );
14459 DIP("out%c %s,$%d\n", nameISize(sz), nameIReg(sz,R_EAX), (Int)abyte);
14460 goto do_OUT;
14461 case 0xE7: /* OUT eAX, imm8 */
14462 vassert(sz == 2 || sz == 4);
14463 t1 = newTemp(Ity_I32);
14464 abyte = getIByte(delta); delta++;
14465 assign( t1, mkU32( abyte & 0xFF ) );
14466 DIP("out%c %s,$%d\n", nameISize(sz), nameIReg(sz,R_EAX), (Int)abyte);
14467 goto do_OUT;
14468 case 0xEE: /* OUT AL, %DX */
14469 sz = 1;
14470 t1 = newTemp(Ity_I32);
14471 assign( t1, unop(Iop_16Uto32, getIReg(2, R_EDX)) );
14472 DIP("out%c %s,%s\n", nameISize(sz), nameIReg(sz,R_EAX),
14473 nameIReg(2,R_EDX));
14474 goto do_OUT;
14475 case 0xEF: /* OUT eAX, %DX */
14476 vassert(sz == 2 || sz == 4);
14477 t1 = newTemp(Ity_I32);
14478 assign( t1, unop(Iop_16Uto32, getIReg(2, R_EDX)) );
14479 DIP("out%c %s,%s\n", nameISize(sz), nameIReg(sz,R_EAX),
14480 nameIReg(2,R_EDX));
14481 goto do_OUT;
14482 do_OUT: {
14483 /* At this point, sz indicates the width, and t1 is a 32-bit
14484 value giving port number. */
14485 IRDirty* d;
14486 vassert(sz == 1 || sz == 2 || sz == 4);
14487 ty = szToITy(sz);
14488 d = unsafeIRDirty_0_N(
14489 0/*regparms*/,
14490 "x86g_dirtyhelper_OUT",
14491 &x86g_dirtyhelper_OUT,
14492 mkIRExprVec_3( mkexpr(t1),
14493 widenUto32( getIReg(sz, R_EAX) ),
14494 mkU32(sz) )
14495 );
14496 stmt( IRStmt_Dirty(d) );
14497 break;
14498 }
14499
14500 /* ------------------------ (Grp1 extensions) ---------- */
14501
14502 case 0x82: /* Grp1 Ib,Eb too. Apparently this is the same as
14503 case 0x80, but only in 32-bit mode. */
14504 /* fallthru */
14505 case 0x80: /* Grp1 Ib,Eb */
14506 modrm = getIByte(delta);
14507 am_sz = lengthAMode(delta);
14508 sz = 1;
14509 d_sz = 1;
14510 d32 = getUChar(delta + am_sz);
14511 delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 );
14512 break;
14513
14514 case 0x81: /* Grp1 Iv,Ev */
14515 modrm = getIByte(delta);
14516 am_sz = lengthAMode(delta);
14517 d_sz = sz;
14518 d32 = getUDisp(d_sz, delta + am_sz);
14519 delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 );
14520 break;
14521
14522 case 0x83: /* Grp1 Ib,Ev */
14523 modrm = getIByte(delta);
14524 am_sz = lengthAMode(delta);
14525 d_sz = 1;
14526 d32 = getSDisp8(delta + am_sz);
14527 delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 );
14528 break;
14529
14530 /* ------------------------ (Grp2 extensions) ---------- */
14531
14532 case 0xC0: { /* Grp2 Ib,Eb */
14533 Bool decode_OK = True;
14534 modrm = getIByte(delta);
14535 am_sz = lengthAMode(delta);
14536 d_sz = 1;
14537 d32 = getUChar(delta + am_sz);
14538 sz = 1;
14539 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
14540 mkU8(d32 & 0xFF), NULL, &decode_OK );
14541 if (!decode_OK)
14542 goto decode_failure;
14543 break;
14544 }
14545 case 0xC1: { /* Grp2 Ib,Ev */
14546 Bool decode_OK = True;
14547 modrm = getIByte(delta);
14548 am_sz = lengthAMode(delta);
14549 d_sz = 1;
14550 d32 = getUChar(delta + am_sz);
14551 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
14552 mkU8(d32 & 0xFF), NULL, &decode_OK );
14553 if (!decode_OK)
14554 goto decode_failure;
14555 break;
14556 }
14557 case 0xD0: { /* Grp2 1,Eb */
14558 Bool decode_OK = True;
14559 modrm = getIByte(delta);
14560 am_sz = lengthAMode(delta);
14561 d_sz = 0;
14562 d32 = 1;
14563 sz = 1;
14564 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
14565 mkU8(d32), NULL, &decode_OK );
14566 if (!decode_OK)
14567 goto decode_failure;
14568 break;
14569 }
14570 case 0xD1: { /* Grp2 1,Ev */
14571 Bool decode_OK = True;
14572 modrm = getUChar(delta);
14573 am_sz = lengthAMode(delta);
14574 d_sz = 0;
14575 d32 = 1;
14576 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
14577 mkU8(d32), NULL, &decode_OK );
14578 if (!decode_OK)
14579 goto decode_failure;
14580 break;
14581 }
14582 case 0xD2: { /* Grp2 CL,Eb */
14583 Bool decode_OK = True;
14584 modrm = getUChar(delta);
14585 am_sz = lengthAMode(delta);
14586 d_sz = 0;
14587 sz = 1;
14588 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
14589 getIReg(1,R_ECX), "%cl", &decode_OK );
14590 if (!decode_OK)
14591 goto decode_failure;
14592 break;
14593 }
14594 case 0xD3: { /* Grp2 CL,Ev */
14595 Bool decode_OK = True;
14596 modrm = getIByte(delta);
14597 am_sz = lengthAMode(delta);
14598 d_sz = 0;
14599 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
14600 getIReg(1,R_ECX), "%cl", &decode_OK );
14601 if (!decode_OK)
14602 goto decode_failure;
14603 break;
14604 }
14605
14606 /* ------------------------ (Grp3 extensions) ---------- */
14607
14608 case 0xF6: { /* Grp3 Eb */
14609 Bool decode_OK = True;
14610 delta = dis_Grp3 ( sorb, pfx_lock, 1, delta, &decode_OK );
14611 if (!decode_OK)
14612 goto decode_failure;
14613 break;
14614 }
14615 case 0xF7: { /* Grp3 Ev */
14616 Bool decode_OK = True;
14617 delta = dis_Grp3 ( sorb, pfx_lock, sz, delta, &decode_OK );
14618 if (!decode_OK)
14619 goto decode_failure;
14620 break;
14621 }
14622
14623 /* ------------------------ (Grp4 extensions) ---------- */
14624
14625 case 0xFE: { /* Grp4 Eb */
14626 Bool decode_OK = True;
14627 delta = dis_Grp4 ( sorb, pfx_lock, delta, &decode_OK );
14628 if (!decode_OK)
14629 goto decode_failure;
14630 break;
14631 }
14632
14633 /* ------------------------ (Grp5 extensions) ---------- */
14634
14635 case 0xFF: { /* Grp5 Ev */
14636 Bool decode_OK = True;
14637 delta = dis_Grp5 ( sorb, pfx_lock, sz, delta, &dres, &decode_OK );
14638 if (!decode_OK)
14639 goto decode_failure;
14640 break;
14641 }
14642
14643 /* ------------------------ Escapes to 2-byte opcodes -- */
14644
14645 case 0x0F: {
14646 opc = getIByte(delta); delta++;
14647 switch (opc) {
14648
14649 /* =-=-=-=-=-=-=-=-=- Grp8 =-=-=-=-=-=-=-=-=-=-=-= */
14650
14651 case 0xBA: { /* Grp8 Ib,Ev */
14652 Bool decode_OK = False;
14653 modrm = getUChar(delta);
14654 am_sz = lengthAMode(delta);
14655 d32 = getSDisp8(delta + am_sz);
14656 delta = dis_Grp8_Imm ( sorb, pfx_lock, delta, modrm,
14657 am_sz, sz, d32, &decode_OK );
14658 if (!decode_OK)
14659 goto decode_failure;
14660 break;
14661 }
14662
14663 /* =-=-=-=-=-=-=-=-=- BSF/BSR -=-=-=-=-=-=-=-=-=-= */
14664
14665 case 0xBC: /* BSF Gv,Ev */
14666 delta = dis_bs_E_G ( sorb, sz, delta, True );
14667 break;
14668 case 0xBD: /* BSR Gv,Ev */
14669 delta = dis_bs_E_G ( sorb, sz, delta, False );
14670 break;
14671
14672 /* =-=-=-=-=-=-=-=-=- BSWAP -=-=-=-=-=-=-=-=-=-=-= */
14673
14674 case 0xC8: /* BSWAP %eax */
14675 case 0xC9:
14676 case 0xCA:
14677 case 0xCB:
14678 case 0xCC:
14679 case 0xCD:
14680 case 0xCE:
14681 case 0xCF: /* BSWAP %edi */
14682 /* AFAICS from the Intel docs, this only exists at size 4. */
14683 if (sz != 4) goto decode_failure;
14684
14685 t1 = newTemp(Ity_I32);
14686 assign( t1, getIReg(4, opc-0xC8) );
14687 t2 = math_BSWAP(t1, Ity_I32);
14688
14689 putIReg(4, opc-0xC8, mkexpr(t2));
14690 DIP("bswapl %s\n", nameIReg(4, opc-0xC8));
14691 break;
14692
14693 /* =-=-=-=-=-=-=-=-=- BT/BTS/BTR/BTC =-=-=-=-=-=-= */
14694
14695 case 0xA3: /* BT Gv,Ev */
14696 delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpNone );
14697 break;
14698 case 0xB3: /* BTR Gv,Ev */
14699 delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpReset );
14700 break;
14701 case 0xAB: /* BTS Gv,Ev */
14702 delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpSet );
14703 break;
14704 case 0xBB: /* BTC Gv,Ev */
14705 delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpComp );
14706 break;
14707
14708 /* =-=-=-=-=-=-=-=-=- CMOV =-=-=-=-=-=-=-=-=-=-=-= */
14709
14710 case 0x40:
14711 case 0x41:
14712 case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */
14713 case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */
14714 case 0x44: /* CMOVZb/CMOVEb (cmov zero) */
14715 case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */
14716 case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */
14717 case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */
14718 case 0x48: /* CMOVSb (cmov negative) */
14719 case 0x49: /* CMOVSb (cmov not negative) */
14720 case 0x4A: /* CMOVP (cmov parity even) */
14721 case 0x4B: /* CMOVNP (cmov parity odd) */
14722 case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */
14723 case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */
14724 case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */
14725 case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */
14726 delta = dis_cmov_E_G(sorb, sz, (X86Condcode)(opc - 0x40), delta);
14727 break;
14728
14729 /* =-=-=-=-=-=-=-=-=- CMPXCHG -=-=-=-=-=-=-=-=-=-= */
14730
14731 case 0xB0: /* CMPXCHG Gb,Eb */
14732 delta = dis_cmpxchg_G_E ( sorb, pfx_lock, 1, delta );
14733 break;
14734 case 0xB1: /* CMPXCHG Gv,Ev */
14735 delta = dis_cmpxchg_G_E ( sorb, pfx_lock, sz, delta );
14736 break;
14737
14738 case 0xC7: { /* CMPXCHG8B Gv (0F C7 /1) */
14739 IRTemp expdHi = newTemp(Ity_I32);
14740 IRTemp expdLo = newTemp(Ity_I32);
14741 IRTemp dataHi = newTemp(Ity_I32);
14742 IRTemp dataLo = newTemp(Ity_I32);
14743 IRTemp oldHi = newTemp(Ity_I32);
14744 IRTemp oldLo = newTemp(Ity_I32);
14745 IRTemp flags_old = newTemp(Ity_I32);
14746 IRTemp flags_new = newTemp(Ity_I32);
14747 IRTemp success = newTemp(Ity_I1);
14748
14749 /* Translate this using a DCAS, even if there is no LOCK
14750 prefix. Life is too short to bother with generating two
14751 different translations for the with/without-LOCK-prefix
14752 cases. */
14753 *expect_CAS = True;
14754
14755 /* Decode, and generate address. */
14756 if (sz != 4) goto decode_failure;
14757 modrm = getIByte(delta);
14758 if (epartIsReg(modrm)) goto decode_failure;
14759 if (gregOfRM(modrm) != 1) goto decode_failure;
14760 addr = disAMode ( &alen, sorb, delta, dis_buf );
14761 delta += alen;
14762
14763 /* Get the expected and new values. */
14764 assign( expdHi, getIReg(4,R_EDX) );
14765 assign( expdLo, getIReg(4,R_EAX) );
14766 assign( dataHi, getIReg(4,R_ECX) );
14767 assign( dataLo, getIReg(4,R_EBX) );
14768
14769 /* Do the DCAS */
14770 stmt( IRStmt_CAS(
14771 mkIRCAS( oldHi, oldLo,
14772 Iend_LE, mkexpr(addr),
14773 mkexpr(expdHi), mkexpr(expdLo),
14774 mkexpr(dataHi), mkexpr(dataLo)
14775 )));
14776
14777 /* success when oldHi:oldLo == expdHi:expdLo */
14778 assign( success,
14779 binop(Iop_CasCmpEQ32,
14780 binop(Iop_Or32,
14781 binop(Iop_Xor32, mkexpr(oldHi), mkexpr(expdHi)),
14782 binop(Iop_Xor32, mkexpr(oldLo), mkexpr(expdLo))
14783 ),
14784 mkU32(0)
14785 ));
14786
14787 /* If the DCAS is successful, that is to say oldHi:oldLo ==
14788 expdHi:expdLo, then put expdHi:expdLo back in EDX:EAX,
14789 which is where they came from originally. Both the actual
14790 contents of these two regs, and any shadow values, are
14791 unchanged. If the DCAS fails then we're putting into
14792 EDX:EAX the value seen in memory. */
14793 putIReg(4, R_EDX,
14794 IRExpr_ITE( mkexpr(success),
14795 mkexpr(expdHi), mkexpr(oldHi)
14796 ));
14797 putIReg(4, R_EAX,
14798 IRExpr_ITE( mkexpr(success),
14799 mkexpr(expdLo), mkexpr(oldLo)
14800 ));
14801
14802 /* Copy the success bit into the Z flag and leave the others
14803 unchanged */
14804 assign( flags_old, widenUto32(mk_x86g_calculate_eflags_all()));
14805 assign(
14806 flags_new,
14807 binop(Iop_Or32,
14808 binop(Iop_And32, mkexpr(flags_old),
14809 mkU32(~X86G_CC_MASK_Z)),
14810 binop(Iop_Shl32,
14811 binop(Iop_And32,
14812 unop(Iop_1Uto32, mkexpr(success)), mkU32(1)),
14813 mkU8(X86G_CC_SHIFT_Z)) ));
14814
14815 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
14816 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(flags_new) ));
14817 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
14818 /* Set NDEP even though it isn't used. This makes
14819 redundant-PUT elimination of previous stores to this field
14820 work better. */
14821 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
14822
14823 /* Sheesh. Aren't you glad it was me and not you that had to
14824 write and validate all this grunge? */
14825
14826 DIP("cmpxchg8b %s\n", dis_buf);
14827 break;
14828 }
14829
14830 /* =-=-=-=-=-=-=-=-=- CPUID -=-=-=-=-=-=-=-=-=-=-= */
14831
14832 case 0xA2: { /* CPUID */
14833 /* Uses dirty helper:
14834 void dirtyhelper_CPUID_sse[012] ( VexGuestX86State* )
14835 declared to mod eax, wr ebx, ecx, edx
14836 */
14837 IRDirty* d = NULL;
14838 void* fAddr = NULL;
14839 const HChar* fName = NULL;
14840 if (archinfo->hwcaps & VEX_HWCAPS_X86_SSE2) {
14841 fName = "x86g_dirtyhelper_CPUID_sse2";
14842 fAddr = &x86g_dirtyhelper_CPUID_sse2;
14843 }
14844 else
14845 if (archinfo->hwcaps & VEX_HWCAPS_X86_SSE1) {
14846 fName = "x86g_dirtyhelper_CPUID_sse1";
14847 fAddr = &x86g_dirtyhelper_CPUID_sse1;
14848 }
14849 else
14850 if (archinfo->hwcaps & VEX_HWCAPS_X86_MMXEXT) {
14851 fName = "x86g_dirtyhelper_CPUID_mmxext";
14852 fAddr = &x86g_dirtyhelper_CPUID_mmxext;
14853 }
14854 else
14855 if (archinfo->hwcaps == 0/*no SSE*/) {
14856 fName = "x86g_dirtyhelper_CPUID_sse0";
14857 fAddr = &x86g_dirtyhelper_CPUID_sse0;
14858 } else
14859 vpanic("disInstr(x86)(cpuid)");
14860
14861 vassert(fName); vassert(fAddr);
14862 d = unsafeIRDirty_0_N ( 0/*regparms*/,
14863 fName, fAddr, mkIRExprVec_1(IRExpr_BBPTR()) );
14864 /* declare guest state effects */
14865 d->nFxState = 4;
14866 vex_bzero(&d->fxState, sizeof(d->fxState));
14867 d->fxState[0].fx = Ifx_Modify;
14868 d->fxState[0].offset = OFFB_EAX;
14869 d->fxState[0].size = 4;
14870 d->fxState[1].fx = Ifx_Write;
14871 d->fxState[1].offset = OFFB_EBX;
14872 d->fxState[1].size = 4;
14873 d->fxState[2].fx = Ifx_Modify;
14874 d->fxState[2].offset = OFFB_ECX;
14875 d->fxState[2].size = 4;
14876 d->fxState[3].fx = Ifx_Write;
14877 d->fxState[3].offset = OFFB_EDX;
14878 d->fxState[3].size = 4;
14879 /* execute the dirty call, side-effecting guest state */
14880 stmt( IRStmt_Dirty(d) );
14881 /* CPUID is a serialising insn. So, just in case someone is
14882 using it as a memory fence ... */
14883 stmt( IRStmt_MBE(Imbe_Fence) );
14884 DIP("cpuid\n");
14885 break;
14886 }
14887
14888 //-- if (!VG_(cpu_has_feature)(VG_X86_FEAT_CPUID))
14889 //-- goto decode_failure;
14890 //--
14891 //-- t1 = newTemp(cb);
14892 //-- t2 = newTemp(cb);
14893 //-- t3 = newTemp(cb);
14894 //-- t4 = newTemp(cb);
14895 //-- uInstr0(cb, CALLM_S, 0);
14896 //--
14897 //-- uInstr2(cb, GET, 4, ArchReg, R_EAX, TempReg, t1);
14898 //-- uInstr1(cb, PUSH, 4, TempReg, t1);
14899 //--
14900 //-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t2);
14901 //-- uLiteral(cb, 0);
14902 //-- uInstr1(cb, PUSH, 4, TempReg, t2);
14903 //--
14904 //-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t3);
14905 //-- uLiteral(cb, 0);
14906 //-- uInstr1(cb, PUSH, 4, TempReg, t3);
14907 //--
14908 //-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t4);
14909 //-- uLiteral(cb, 0);
14910 //-- uInstr1(cb, PUSH, 4, TempReg, t4);
14911 //--
14912 //-- uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_CPUID));
14913 //-- uFlagsRWU(cb, FlagsEmpty, FlagsEmpty, FlagsEmpty);
14914 //--
14915 //-- uInstr1(cb, POP, 4, TempReg, t4);
14916 //-- uInstr2(cb, PUT, 4, TempReg, t4, ArchReg, R_EDX);
14917 //--
14918 //-- uInstr1(cb, POP, 4, TempReg, t3);
14919 //-- uInstr2(cb, PUT, 4, TempReg, t3, ArchReg, R_ECX);
14920 //--
14921 //-- uInstr1(cb, POP, 4, TempReg, t2);
14922 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_EBX);
14923 //--
14924 //-- uInstr1(cb, POP, 4, TempReg, t1);
14925 //-- uInstr2(cb, PUT, 4, TempReg, t1, ArchReg, R_EAX);
14926 //--
14927 //-- uInstr0(cb, CALLM_E, 0);
14928 //-- DIP("cpuid\n");
14929 //-- break;
14930 //--
14931 /* =-=-=-=-=-=-=-=-=- MOVZX, MOVSX =-=-=-=-=-=-=-= */
14932
14933 case 0xB6: /* MOVZXb Eb,Gv */
14934 if (sz != 2 && sz != 4)
14935 goto decode_failure;
14936 delta = dis_movx_E_G ( sorb, delta, 1, sz, False );
14937 break;
14938
14939 case 0xB7: /* MOVZXw Ew,Gv */
14940 if (sz != 4)
14941 goto decode_failure;
14942 delta = dis_movx_E_G ( sorb, delta, 2, 4, False );
14943 break;
14944
14945 case 0xBE: /* MOVSXb Eb,Gv */
14946 if (sz != 2 && sz != 4)
14947 goto decode_failure;
14948 delta = dis_movx_E_G ( sorb, delta, 1, sz, True );
14949 break;
14950
14951 case 0xBF: /* MOVSXw Ew,Gv */
14952 if (sz != 4 && /* accept movsww, sigh, see #250799 */sz != 2)
14953 goto decode_failure;
14954 delta = dis_movx_E_G ( sorb, delta, 2, sz, True );
14955 break;
14956
14957 //-- /* =-=-=-=-=-=-=-=-=-=-= MOVNTI -=-=-=-=-=-=-=-=-= */
14958 //--
14959 //-- case 0xC3: /* MOVNTI Gv,Ev */
14960 //-- vg_assert(sz == 4);
14961 //-- modrm = getUChar(eip);
14962 //-- vg_assert(!epartIsReg(modrm));
14963 //-- t1 = newTemp(cb);
14964 //-- uInstr2(cb, GET, 4, ArchReg, gregOfRM(modrm), TempReg, t1);
14965 //-- pair = disAMode ( cb, sorb, eip, dis_buf );
14966 //-- t2 = LOW24(pair);
14967 //-- eip += HI8(pair);
14968 //-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2);
14969 //-- DIP("movnti %s,%s\n", nameIReg(4,gregOfRM(modrm)), dis_buf);
14970 //-- break;
14971
14972 /* =-=-=-=-=-=-=-=-=- MUL/IMUL =-=-=-=-=-=-=-=-=-= */
14973
14974 case 0xAF: /* IMUL Ev, Gv */
14975 delta = dis_mul_E_G ( sorb, sz, delta );
14976 break;
14977
14978 /* =-=-=-=-=-=-=-=-=- NOPs =-=-=-=-=-=-=-=-=-=-=-= */
14979
14980 case 0x1F:
14981 modrm = getUChar(delta);
14982 if (epartIsReg(modrm)) goto decode_failure;
14983 addr = disAMode ( &alen, sorb, delta, dis_buf );
14984 delta += alen;
14985 DIP("nop%c %s\n", nameISize(sz), dis_buf);
14986 break;
14987
14988 /* =-=-=-=-=-=-=-=-=- Jcond d32 -=-=-=-=-=-=-=-=-= */
14989 case 0x80:
14990 case 0x81:
14991 case 0x82: /* JBb/JNAEb (jump below) */
14992 case 0x83: /* JNBb/JAEb (jump not below) */
14993 case 0x84: /* JZb/JEb (jump zero) */
14994 case 0x85: /* JNZb/JNEb (jump not zero) */
14995 case 0x86: /* JBEb/JNAb (jump below or equal) */
14996 case 0x87: /* JNBEb/JAb (jump not below or equal) */
14997 case 0x88: /* JSb (jump negative) */
14998 case 0x89: /* JSb (jump not negative) */
14999 case 0x8A: /* JP (jump parity even) */
15000 case 0x8B: /* JNP/JPO (jump parity odd) */
15001 case 0x8C: /* JLb/JNGEb (jump less) */
15002 case 0x8D: /* JGEb/JNLb (jump greater or equal) */
15003 case 0x8E: /* JLEb/JNGb (jump less or equal) */
15004 case 0x8F: /* JGb/JNLEb (jump greater) */
15005 { Int jmpDelta;
15006 const HChar* comment = "";
15007 jmpDelta = (Int)getUDisp32(delta);
15008 d32 = (((Addr32)guest_EIP_bbstart)+delta+4) + jmpDelta;
15009 delta += 4;
15010 if (resteerCisOk
15011 && vex_control.guest_chase_cond
15012 && (Addr32)d32 != (Addr32)guest_EIP_bbstart
15013 && jmpDelta < 0
15014 && resteerOkFn( callback_opaque, (Addr64)(Addr32)d32) ) {
15015 /* Speculation: assume this backward branch is taken. So
15016 we need to emit a side-exit to the insn following this
15017 one, on the negation of the condition, and continue at
15018 the branch target address (d32). If we wind up back at
15019 the first instruction of the trace, just stop; it's
15020 better to let the IR loop unroller handle that case.*/
15021 stmt( IRStmt_Exit(
15022 mk_x86g_calculate_condition((X86Condcode)
15023 (1 ^ (opc - 0x80))),
15024 Ijk_Boring,
15025 IRConst_U32(guest_EIP_bbstart+delta),
15026 OFFB_EIP ) );
15027 dres.whatNext = Dis_ResteerC;
15028 dres.continueAt = (Addr64)(Addr32)d32;
15029 comment = "(assumed taken)";
15030 }
15031 else
15032 if (resteerCisOk
15033 && vex_control.guest_chase_cond
15034 && (Addr32)d32 != (Addr32)guest_EIP_bbstart
15035 && jmpDelta >= 0
15036 && resteerOkFn( callback_opaque,
15037 (Addr64)(Addr32)(guest_EIP_bbstart+delta)) ) {
15038 /* Speculation: assume this forward branch is not taken.
15039 So we need to emit a side-exit to d32 (the dest) and
15040 continue disassembling at the insn immediately
15041 following this one. */
15042 stmt( IRStmt_Exit(
15043 mk_x86g_calculate_condition((X86Condcode)(opc - 0x80)),
15044 Ijk_Boring,
15045 IRConst_U32(d32),
15046 OFFB_EIP ) );
15047 dres.whatNext = Dis_ResteerC;
15048 dres.continueAt = (Addr64)(Addr32)(guest_EIP_bbstart+delta);
15049 comment = "(assumed not taken)";
15050 }
15051 else {
15052 /* Conservative default translation - end the block at
15053 this point. */
15054 jcc_01( &dres, (X86Condcode)(opc - 0x80),
15055 (Addr32)(guest_EIP_bbstart+delta), d32);
15056 vassert(dres.whatNext == Dis_StopHere);
15057 }
15058 DIP("j%s-32 0x%x %s\n", name_X86Condcode(opc - 0x80), d32, comment);
15059 break;
15060 }
15061
15062 /* =-=-=-=-=-=-=-=-=- RDTSC -=-=-=-=-=-=-=-=-=-=-= */
15063 case 0x31: { /* RDTSC */
15064 IRTemp val = newTemp(Ity_I64);
15065 IRExpr** args = mkIRExprVec_0();
15066 IRDirty* d = unsafeIRDirty_1_N (
15067 val,
15068 0/*regparms*/,
15069 "x86g_dirtyhelper_RDTSC",
15070 &x86g_dirtyhelper_RDTSC,
15071 args
15072 );
15073 /* execute the dirty call, dumping the result in val. */
15074 stmt( IRStmt_Dirty(d) );
15075 putIReg(4, R_EDX, unop(Iop_64HIto32, mkexpr(val)));
15076 putIReg(4, R_EAX, unop(Iop_64to32, mkexpr(val)));
15077 DIP("rdtsc\n");
15078 break;
15079 }
15080
15081 /* =-=-=-=-=-=-=-=-=- PUSH/POP Sreg =-=-=-=-=-=-=-=-=-= */
15082
15083 case 0xA1: /* POP %FS */
15084 dis_pop_segreg( R_FS, sz ); break;
15085 case 0xA9: /* POP %GS */
15086 dis_pop_segreg( R_GS, sz ); break;
15087
15088 case 0xA0: /* PUSH %FS */
15089 dis_push_segreg( R_FS, sz ); break;
15090 case 0xA8: /* PUSH %GS */
15091 dis_push_segreg( R_GS, sz ); break;
15092
15093 /* =-=-=-=-=-=-=-=-=- SETcc Eb =-=-=-=-=-=-=-=-=-= */
15094 case 0x90:
15095 case 0x91:
15096 case 0x92: /* set-Bb/set-NAEb (jump below) */
15097 case 0x93: /* set-NBb/set-AEb (jump not below) */
15098 case 0x94: /* set-Zb/set-Eb (jump zero) */
15099 case 0x95: /* set-NZb/set-NEb (jump not zero) */
15100 case 0x96: /* set-BEb/set-NAb (jump below or equal) */
15101 case 0x97: /* set-NBEb/set-Ab (jump not below or equal) */
15102 case 0x98: /* set-Sb (jump negative) */
15103 case 0x99: /* set-Sb (jump not negative) */
15104 case 0x9A: /* set-P (jump parity even) */
15105 case 0x9B: /* set-NP (jump parity odd) */
15106 case 0x9C: /* set-Lb/set-NGEb (jump less) */
15107 case 0x9D: /* set-GEb/set-NLb (jump greater or equal) */
15108 case 0x9E: /* set-LEb/set-NGb (jump less or equal) */
15109 case 0x9F: /* set-Gb/set-NLEb (jump greater) */
15110 t1 = newTemp(Ity_I8);
15111 assign( t1, unop(Iop_1Uto8,mk_x86g_calculate_condition(opc-0x90)) );
15112 modrm = getIByte(delta);
15113 if (epartIsReg(modrm)) {
15114 delta++;
15115 putIReg(1, eregOfRM(modrm), mkexpr(t1));
15116 DIP("set%s %s\n", name_X86Condcode(opc-0x90),
15117 nameIReg(1,eregOfRM(modrm)));
15118 } else {
15119 addr = disAMode ( &alen, sorb, delta, dis_buf );
15120 delta += alen;
15121 storeLE( mkexpr(addr), mkexpr(t1) );
15122 DIP("set%s %s\n", name_X86Condcode(opc-0x90), dis_buf);
15123 }
15124 break;
15125
15126 /* =-=-=-=-=-=-=-=-=- SHLD/SHRD -=-=-=-=-=-=-=-=-= */
15127
15128 case 0xA4: /* SHLDv imm8,Gv,Ev */
15129 modrm = getIByte(delta);
15130 d32 = delta + lengthAMode(delta);
15131 vex_sprintf(dis_buf, "$%d", getIByte(d32));
15132 delta = dis_SHLRD_Gv_Ev (
15133 sorb, delta, modrm, sz,
15134 mkU8(getIByte(d32)), True, /* literal */
15135 dis_buf, True );
15136 break;
15137 case 0xA5: /* SHLDv %cl,Gv,Ev */
15138 modrm = getIByte(delta);
15139 delta = dis_SHLRD_Gv_Ev (
15140 sorb, delta, modrm, sz,
15141 getIReg(1,R_ECX), False, /* not literal */
15142 "%cl", True );
15143 break;
15144
15145 case 0xAC: /* SHRDv imm8,Gv,Ev */
15146 modrm = getIByte(delta);
15147 d32 = delta + lengthAMode(delta);
15148 vex_sprintf(dis_buf, "$%d", getIByte(d32));
15149 delta = dis_SHLRD_Gv_Ev (
15150 sorb, delta, modrm, sz,
15151 mkU8(getIByte(d32)), True, /* literal */
15152 dis_buf, False );
15153 break;
15154 case 0xAD: /* SHRDv %cl,Gv,Ev */
15155 modrm = getIByte(delta);
15156 delta = dis_SHLRD_Gv_Ev (
15157 sorb, delta, modrm, sz,
15158 getIReg(1,R_ECX), False, /* not literal */
15159 "%cl", False );
15160 break;
15161
15162 /* =-=-=-=-=-=-=-=-=- SYSENTER -=-=-=-=-=-=-=-=-=-= */
15163
15164 case 0x34:
15165 /* Simple implementation needing a long explaination.
15166
15167 sysenter is a kind of syscall entry. The key thing here
15168 is that the return address is not known -- that is
15169 something that is beyond Vex's knowledge. So this IR
15170 forces a return to the scheduler, which can do what it
15171 likes to simulate the systenter, but it MUST set this
15172 thread's guest_EIP field with the continuation address
15173 before resuming execution. If that doesn't happen, the
15174 thread will jump to address zero, which is probably
15175 fatal.
15176 */
15177
15178 /* Note where we are, so we can back up the guest to this
15179 point if the syscall needs to be restarted. */
15180 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL,
15181 mkU32(guest_EIP_curr_instr) ) );
15182 jmp_lit(&dres, Ijk_Sys_sysenter, 0/*bogus next EIP value*/);
15183 vassert(dres.whatNext == Dis_StopHere);
15184 DIP("sysenter");
15185 break;
15186
15187 /* =-=-=-=-=-=-=-=-=- XADD -=-=-=-=-=-=-=-=-=-= */
15188
15189 case 0xC0: { /* XADD Gb,Eb */
15190 Bool decodeOK;
15191 delta = dis_xadd_G_E ( sorb, pfx_lock, 1, delta, &decodeOK );
15192 if (!decodeOK) goto decode_failure;
15193 break;
15194 }
15195 case 0xC1: { /* XADD Gv,Ev */
15196 Bool decodeOK;
15197 delta = dis_xadd_G_E ( sorb, pfx_lock, sz, delta, &decodeOK );
15198 if (!decodeOK) goto decode_failure;
15199 break;
15200 }
15201
15202 /* =-=-=-=-=-=-=-=-=- MMXery =-=-=-=-=-=-=-=-=-=-= */
15203
15204 case 0x71:
15205 case 0x72:
15206 case 0x73: /* PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
15207
15208 case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */
15209 case 0x7E: /* MOVD (src)mmxreg, (dst)ireg-or-mem */
15210 case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
15211 case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
15212
15213 case 0xFC:
15214 case 0xFD:
15215 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
15216
15217 case 0xEC:
15218 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
15219
15220 case 0xDC:
15221 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
15222
15223 case 0xF8:
15224 case 0xF9:
15225 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
15226
15227 case 0xE8:
15228 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
15229
15230 case 0xD8:
15231 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
15232
15233 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
15234 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
15235
15236 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
15237
15238 case 0x74:
15239 case 0x75:
15240 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
15241
15242 case 0x64:
15243 case 0x65:
15244 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
15245
15246 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
15247 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
15248 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
15249
15250 case 0x68:
15251 case 0x69:
15252 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
15253
15254 case 0x60:
15255 case 0x61:
15256 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
15257
15258 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
15259 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
15260 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
15261 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
15262
15263 case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
15264 case 0xF2:
15265 case 0xF3:
15266
15267 case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
15268 case 0xD2:
15269 case 0xD3:
15270
15271 case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
15272 case 0xE2:
15273 {
15274 Int delta0 = delta-1;
15275 Bool decode_OK = False;
15276
15277 /* If sz==2 this is SSE, and we assume sse idec has
15278 already spotted those cases by now. */
15279 if (sz != 4)
15280 goto decode_failure;
15281
15282 delta = dis_MMX ( &decode_OK, sorb, sz, delta-1 );
15283 if (!decode_OK) {
15284 delta = delta0;
15285 goto decode_failure;
15286 }
15287 break;
15288 }
15289
15290 case 0x0E: /* FEMMS */
15291 case 0x77: /* EMMS */
15292 if (sz != 4)
15293 goto decode_failure;
15294 do_EMMS_preamble();
15295 DIP("{f}emms\n");
15296 break;
15297
15298 /* =-=-=-=-=-=-=-=-=- SGDT and SIDT =-=-=-=-=-=-=-=-=-=-= */
15299 case 0x01: /* 0F 01 /0 -- SGDT */
15300 /* 0F 01 /1 -- SIDT */
15301 {
15302 /* This is really revolting, but ... since each processor
15303 (core) only has one IDT and one GDT, just let the guest
15304 see it (pass-through semantics). I can't see any way to
15305 construct a faked-up value, so don't bother to try. */
15306 modrm = getUChar(delta);
15307 addr = disAMode ( &alen, sorb, delta, dis_buf );
15308 delta += alen;
15309 if (epartIsReg(modrm)) goto decode_failure;
15310 if (gregOfRM(modrm) != 0 && gregOfRM(modrm) != 1)
15311 goto decode_failure;
15312 switch (gregOfRM(modrm)) {
15313 case 0: DIP("sgdt %s\n", dis_buf); break;
15314 case 1: DIP("sidt %s\n", dis_buf); break;
15315 default: vassert(0); /*NOTREACHED*/
15316 }
15317
15318 IRDirty* d = unsafeIRDirty_0_N (
15319 0/*regparms*/,
15320 "x86g_dirtyhelper_SxDT",
15321 &x86g_dirtyhelper_SxDT,
15322 mkIRExprVec_2( mkexpr(addr),
15323 mkU32(gregOfRM(modrm)) )
15324 );
15325 /* declare we're writing memory */
15326 d->mFx = Ifx_Write;
15327 d->mAddr = mkexpr(addr);
15328 d->mSize = 6;
15329 stmt( IRStmt_Dirty(d) );
15330 break;
15331 }
15332
15333 case 0x05: /* AMD's syscall */
15334 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL,
15335 mkU32(guest_EIP_curr_instr) ) );
15336 jmp_lit(&dres, Ijk_Sys_syscall, ((Addr32)guest_EIP_bbstart)+delta);
15337 vassert(dres.whatNext == Dis_StopHere);
15338 DIP("syscall\n");
15339 break;
15340
15341 /* =-=-=-=-=-=-=-=-=- unimp2 =-=-=-=-=-=-=-=-=-=-= */
15342
15343 default:
15344 goto decode_failure;
15345 } /* switch (opc) for the 2-byte opcodes */
15346 goto decode_success;
15347 } /* case 0x0F: of primary opcode */
15348
15349 /* ------------------------ ??? ------------------------ */
15350
15351 default:
15352 decode_failure:
15353 /* All decode failures end up here. */
15354 if (sigill_diag) {
15355 vex_printf("vex x86->IR: unhandled instruction bytes: "
15356 "0x%x 0x%x 0x%x 0x%x\n",
15357 (Int)getIByte(delta_start+0),
15358 (Int)getIByte(delta_start+1),
15359 (Int)getIByte(delta_start+2),
15360 (Int)getIByte(delta_start+3) );
15361 }
15362
15363 /* Tell the dispatcher that this insn cannot be decoded, and so has
15364 not been executed, and (is currently) the next to be executed.
15365 EIP should be up-to-date since it made so at the start of each
15366 insn, but nevertheless be paranoid and update it again right
15367 now. */
15368 stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_curr_instr) ) );
15369 jmp_lit(&dres, Ijk_NoDecode, guest_EIP_curr_instr);
15370 vassert(dres.whatNext == Dis_StopHere);
15371 dres.len = 0;
15372 /* We also need to say that a CAS is not expected now, regardless
15373 of what it might have been set to at the start of the function,
15374 since the IR that we've emitted just above (to synthesis a
15375 SIGILL) does not involve any CAS, and presumably no other IR has
15376 been emitted for this (non-decoded) insn. */
15377 *expect_CAS = False;
15378 return dres;
15379
15380 } /* switch (opc) for the main (primary) opcode switch. */
15381
15382 decode_success:
15383 /* All decode successes end up here. */
15384 switch (dres.whatNext) {
15385 case Dis_Continue:
15386 stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_bbstart + delta) ) );
15387 break;
15388 case Dis_ResteerU:
15389 case Dis_ResteerC:
15390 stmt( IRStmt_Put( OFFB_EIP, mkU32(dres.continueAt) ) );
15391 break;
15392 case Dis_StopHere:
15393 break;
15394 default:
15395 vassert(0);
15396 }
15397
15398 DIP("\n");
15399 dres.len = delta - delta_start;
15400 return dres;
15401 }
15402
15403 #undef DIP
15404 #undef DIS
15405
15406
15407 /*------------------------------------------------------------*/
15408 /*--- Top-level fn ---*/
15409 /*------------------------------------------------------------*/
15410
15411 /* Disassemble a single instruction into IR. The instruction
15412 is located in host memory at &guest_code[delta]. */
15413
disInstr_X86(IRSB * irsb_IN,Bool (* resteerOkFn)(void *,Addr64),Bool resteerCisOk,void * callback_opaque,UChar * guest_code_IN,Long delta,Addr64 guest_IP,VexArch guest_arch,VexArchInfo * archinfo,VexAbiInfo * abiinfo,Bool host_bigendian_IN,Bool sigill_diag_IN)15414 DisResult disInstr_X86 ( IRSB* irsb_IN,
15415 Bool (*resteerOkFn) ( void*, Addr64 ),
15416 Bool resteerCisOk,
15417 void* callback_opaque,
15418 UChar* guest_code_IN,
15419 Long delta,
15420 Addr64 guest_IP,
15421 VexArch guest_arch,
15422 VexArchInfo* archinfo,
15423 VexAbiInfo* abiinfo,
15424 Bool host_bigendian_IN,
15425 Bool sigill_diag_IN )
15426 {
15427 Int i, x1, x2;
15428 Bool expect_CAS, has_CAS;
15429 DisResult dres;
15430
15431 /* Set globals (see top of this file) */
15432 vassert(guest_arch == VexArchX86);
15433 guest_code = guest_code_IN;
15434 irsb = irsb_IN;
15435 host_is_bigendian = host_bigendian_IN;
15436 guest_EIP_curr_instr = (Addr32)guest_IP;
15437 guest_EIP_bbstart = (Addr32)toUInt(guest_IP - delta);
15438
15439 x1 = irsb_IN->stmts_used;
15440 expect_CAS = False;
15441 dres = disInstr_X86_WRK ( &expect_CAS, resteerOkFn,
15442 resteerCisOk,
15443 callback_opaque,
15444 delta, archinfo, abiinfo, sigill_diag_IN );
15445 x2 = irsb_IN->stmts_used;
15446 vassert(x2 >= x1);
15447
15448 /* See comment at the top of disInstr_X86_WRK for meaning of
15449 expect_CAS. Here, we (sanity-)check for the presence/absence of
15450 IRCAS as directed by the returned expect_CAS value. */
15451 has_CAS = False;
15452 for (i = x1; i < x2; i++) {
15453 if (irsb_IN->stmts[i]->tag == Ist_CAS)
15454 has_CAS = True;
15455 }
15456
15457 if (expect_CAS != has_CAS) {
15458 /* inconsistency detected. re-disassemble the instruction so as
15459 to generate a useful error message; then assert. */
15460 vex_traceflags |= VEX_TRACE_FE;
15461 dres = disInstr_X86_WRK ( &expect_CAS, resteerOkFn,
15462 resteerCisOk,
15463 callback_opaque,
15464 delta, archinfo, abiinfo, sigill_diag_IN );
15465 for (i = x1; i < x2; i++) {
15466 vex_printf("\t\t");
15467 ppIRStmt(irsb_IN->stmts[i]);
15468 vex_printf("\n");
15469 }
15470 /* Failure of this assertion is serious and denotes a bug in
15471 disInstr. */
15472 vpanic("disInstr_X86: inconsistency in LOCK prefix handling");
15473 }
15474
15475 return dres;
15476 }
15477
15478
15479 /*--------------------------------------------------------------------*/
15480 /*--- end guest_x86_toIR.c ---*/
15481 /*--------------------------------------------------------------------*/
15482