1 
2 /*---------------------------------------------------------------*/
3 /*--- begin                                   host_x86_isel.c ---*/
4 /*---------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2004-2015 OpenWorks LLP
11       info@open-works.net
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26    02110-1301, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 
30    Neither the names of the U.S. Department of Energy nor the
31    University of California nor the names of its contributors may be
32    used to endorse or promote products derived from this software
33    without prior written permission.
34 */
35 
36 #include "libvex_basictypes.h"
37 #include "libvex_ir.h"
38 #include "libvex.h"
39 
40 #include "ir_match.h"
41 #include "main_util.h"
42 #include "main_globals.h"
43 #include "host_generic_regs.h"
44 #include "host_generic_simd64.h"
45 #include "host_generic_simd128.h"
46 #include "host_x86_defs.h"
47 
48 /* TODO 21 Apr 2005:
49 
50    -- (Really an assembler issue) don't emit CMov32 as a cmov
51       insn, since that's expensive on P4 and conditional branch
52       is cheaper if (as we expect) the condition is highly predictable
53 
54    -- preserve xmm registers across function calls (by declaring them
55       as trashed by call insns)
56 
57    -- preserve x87 ST stack discipline across function calls.  Sigh.
58 
59    -- Check doHelperCall: if a call is conditional, we cannot safely
60       compute any regparm args directly to registers.  Hence, the
61       fast-regparm marshalling should be restricted to unconditional
62       calls only.
63 */
64 
65 /*---------------------------------------------------------*/
66 /*--- x87 control word stuff                            ---*/
67 /*---------------------------------------------------------*/
68 
69 /* Vex-generated code expects to run with the FPU set as follows: all
70    exceptions masked, round-to-nearest, precision = 53 bits.  This
71    corresponds to a FPU control word value of 0x027F.
72 
73    Similarly the SSE control word (%mxcsr) should be 0x1F80.
74 
75    %fpucw and %mxcsr should have these values on entry to
76    Vex-generated code, and should those values should be
77    unchanged at exit.
78 */
79 
80 #define DEFAULT_FPUCW 0x027F
81 
82 /* debugging only, do not use */
83 /* define DEFAULT_FPUCW 0x037F */
84 
85 
86 /*---------------------------------------------------------*/
87 /*--- misc helpers                                      ---*/
88 /*---------------------------------------------------------*/
89 
90 /* These are duplicated in guest-x86/toIR.c */
unop(IROp op,IRExpr * a)91 static IRExpr* unop ( IROp op, IRExpr* a )
92 {
93    return IRExpr_Unop(op, a);
94 }
95 
binop(IROp op,IRExpr * a1,IRExpr * a2)96 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
97 {
98    return IRExpr_Binop(op, a1, a2);
99 }
100 
bind(Int binder)101 static IRExpr* bind ( Int binder )
102 {
103    return IRExpr_Binder(binder);
104 }
105 
isZeroU8(IRExpr * e)106 static Bool isZeroU8 ( IRExpr* e )
107 {
108    return e->tag == Iex_Const
109           && e->Iex.Const.con->tag == Ico_U8
110           && e->Iex.Const.con->Ico.U8 == 0;
111 }
112 
isZeroU32(IRExpr * e)113 static Bool isZeroU32 ( IRExpr* e )
114 {
115    return e->tag == Iex_Const
116           && e->Iex.Const.con->tag == Ico_U32
117           && e->Iex.Const.con->Ico.U32 == 0;
118 }
119 
120 //static Bool isZeroU64 ( IRExpr* e )
121 //{
122 //   return e->tag == Iex_Const
123 //          && e->Iex.Const.con->tag == Ico_U64
124 //          && e->Iex.Const.con->Ico.U64 == 0ULL;
125 //}
126 
127 
128 /*---------------------------------------------------------*/
129 /*--- ISelEnv                                           ---*/
130 /*---------------------------------------------------------*/
131 
132 /* This carries around:
133 
134    - A mapping from IRTemp to IRType, giving the type of any IRTemp we
135      might encounter.  This is computed before insn selection starts,
136      and does not change.
137 
138    - A mapping from IRTemp to HReg.  This tells the insn selector
139      which virtual register(s) are associated with each IRTemp
140      temporary.  This is computed before insn selection starts, and
141      does not change.  We expect this mapping to map precisely the
142      same set of IRTemps as the type mapping does.
143 
144         - vregmap   holds the primary register for the IRTemp.
145         - vregmapHI is only used for 64-bit integer-typed
146              IRTemps.  It holds the identity of a second
147              32-bit virtual HReg, which holds the high half
148              of the value.
149 
150    - The code array, that is, the insns selected so far.
151 
152    - A counter, for generating new virtual registers.
153 
154    - The host subarchitecture we are selecting insns for.
155      This is set at the start and does not change.
156 
157    - A Bool for indicating whether we may generate chain-me
158      instructions for control flow transfers, or whether we must use
159      XAssisted.
160 
161    - The maximum guest address of any guest insn in this block.
162      Actually, the address of the highest-addressed byte from any insn
163      in this block.  Is set at the start and does not change.  This is
164      used for detecting jumps which are definitely forward-edges from
165      this block, and therefore can be made (chained) to the fast entry
166      point of the destination, thereby avoiding the destination's
167      event check.
168 
169    Note, this is all (well, mostly) host-independent.
170 */
171 
172 typedef
173    struct {
174       /* Constant -- are set at the start and do not change. */
175       IRTypeEnv*   type_env;
176 
177       HReg*        vregmap;
178       HReg*        vregmapHI;
179       Int          n_vregmap;
180 
181       UInt         hwcaps;
182 
183       Bool         chainingAllowed;
184       Addr32       max_ga;
185 
186       /* These are modified as we go along. */
187       HInstrArray* code;
188       Int          vreg_ctr;
189    }
190    ISelEnv;
191 
192 
lookupIRTemp(ISelEnv * env,IRTemp tmp)193 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
194 {
195    vassert(tmp >= 0);
196    vassert(tmp < env->n_vregmap);
197    return env->vregmap[tmp];
198 }
199 
lookupIRTemp64(HReg * vrHI,HReg * vrLO,ISelEnv * env,IRTemp tmp)200 static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
201 {
202    vassert(tmp >= 0);
203    vassert(tmp < env->n_vregmap);
204    vassert(! hregIsInvalid(env->vregmapHI[tmp]));
205    *vrLO = env->vregmap[tmp];
206    *vrHI = env->vregmapHI[tmp];
207 }
208 
addInstr(ISelEnv * env,X86Instr * instr)209 static void addInstr ( ISelEnv* env, X86Instr* instr )
210 {
211    addHInstr(env->code, instr);
212    if (vex_traceflags & VEX_TRACE_VCODE) {
213       ppX86Instr(instr, False);
214       vex_printf("\n");
215    }
216 }
217 
newVRegI(ISelEnv * env)218 static HReg newVRegI ( ISelEnv* env )
219 {
220    HReg reg = mkHReg(True/*virtual reg*/, HRcInt32, 0/*enc*/, env->vreg_ctr);
221    env->vreg_ctr++;
222    return reg;
223 }
224 
newVRegF(ISelEnv * env)225 static HReg newVRegF ( ISelEnv* env )
226 {
227    HReg reg = mkHReg(True/*virtual reg*/, HRcFlt64, 0/*enc*/, env->vreg_ctr);
228    env->vreg_ctr++;
229    return reg;
230 }
231 
newVRegV(ISelEnv * env)232 static HReg newVRegV ( ISelEnv* env )
233 {
234    HReg reg = mkHReg(True/*virtual reg*/, HRcVec128, 0/*enc*/, env->vreg_ctr);
235    env->vreg_ctr++;
236    return reg;
237 }
238 
239 
240 /*---------------------------------------------------------*/
241 /*--- ISEL: Forward declarations                        ---*/
242 /*---------------------------------------------------------*/
243 
244 /* These are organised as iselXXX and iselXXX_wrk pairs.  The
245    iselXXX_wrk do the real work, but are not to be called directly.
246    For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
247    checks that all returned registers are virtual.  You should not
248    call the _wrk version directly.
249 */
250 static X86RMI*     iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e );
251 static X86RMI*     iselIntExpr_RMI     ( ISelEnv* env, IRExpr* e );
252 
253 static X86RI*      iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e );
254 static X86RI*      iselIntExpr_RI     ( ISelEnv* env, IRExpr* e );
255 
256 static X86RM*      iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e );
257 static X86RM*      iselIntExpr_RM     ( ISelEnv* env, IRExpr* e );
258 
259 static HReg        iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
260 static HReg        iselIntExpr_R     ( ISelEnv* env, IRExpr* e );
261 
262 static X86AMode*   iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e );
263 static X86AMode*   iselIntExpr_AMode     ( ISelEnv* env, IRExpr* e );
264 
265 static void        iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
266                                        ISelEnv* env, IRExpr* e );
267 static void        iselInt64Expr     ( HReg* rHi, HReg* rLo,
268                                        ISelEnv* env, IRExpr* e );
269 
270 static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
271 static X86CondCode iselCondCode     ( ISelEnv* env, IRExpr* e );
272 
273 static HReg        iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
274 static HReg        iselDblExpr     ( ISelEnv* env, IRExpr* e );
275 
276 static HReg        iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
277 static HReg        iselFltExpr     ( ISelEnv* env, IRExpr* e );
278 
279 static HReg        iselVecExpr_wrk ( ISelEnv* env, IRExpr* e );
280 static HReg        iselVecExpr     ( ISelEnv* env, IRExpr* e );
281 
282 
283 /*---------------------------------------------------------*/
284 /*--- ISEL: Misc helpers                                ---*/
285 /*---------------------------------------------------------*/
286 
287 /* Make a int reg-reg move. */
288 
mk_iMOVsd_RR(HReg src,HReg dst)289 static X86Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
290 {
291    vassert(hregClass(src) == HRcInt32);
292    vassert(hregClass(dst) == HRcInt32);
293    return X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst);
294 }
295 
296 
297 /* Make a vector reg-reg move. */
298 
mk_vMOVsd_RR(HReg src,HReg dst)299 static X86Instr* mk_vMOVsd_RR ( HReg src, HReg dst )
300 {
301    vassert(hregClass(src) == HRcVec128);
302    vassert(hregClass(dst) == HRcVec128);
303    return X86Instr_SseReRg(Xsse_MOV, src, dst);
304 }
305 
306 /* Advance/retreat %esp by n. */
307 
add_to_esp(ISelEnv * env,Int n)308 static void add_to_esp ( ISelEnv* env, Int n )
309 {
310    vassert(n > 0 && n < 256 && (n%4) == 0);
311    addInstr(env,
312             X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(n), hregX86_ESP()));
313 }
314 
sub_from_esp(ISelEnv * env,Int n)315 static void sub_from_esp ( ISelEnv* env, Int n )
316 {
317    vassert(n > 0 && n < 256 && (n%4) == 0);
318    addInstr(env,
319             X86Instr_Alu32R(Xalu_SUB, X86RMI_Imm(n), hregX86_ESP()));
320 }
321 
322 
323 /* Given an amode, return one which references 4 bytes further
324    along. */
325 
advance4(X86AMode * am)326 static X86AMode* advance4 ( X86AMode* am )
327 {
328    X86AMode* am4 = dopyX86AMode(am);
329    switch (am4->tag) {
330       case Xam_IRRS:
331          am4->Xam.IRRS.imm += 4; break;
332       case Xam_IR:
333          am4->Xam.IR.imm += 4; break;
334       default:
335          vpanic("advance4(x86,host)");
336    }
337    return am4;
338 }
339 
340 
341 /* Push an arg onto the host stack, in preparation for a call to a
342    helper function of some kind.  Returns the number of 32-bit words
343    pushed.  If we encounter an IRExpr_VECRET() then we expect that
344    r_vecRetAddr will be a valid register, that holds the relevant
345    address.
346 */
pushArg(ISelEnv * env,IRExpr * arg,HReg r_vecRetAddr)347 static Int pushArg ( ISelEnv* env, IRExpr* arg, HReg r_vecRetAddr )
348 {
349    if (UNLIKELY(arg->tag == Iex_VECRET)) {
350       vassert(0); //ATC
351       vassert(!hregIsInvalid(r_vecRetAddr));
352       addInstr(env, X86Instr_Push(X86RMI_Reg(r_vecRetAddr)));
353       return 1;
354    }
355    if (UNLIKELY(arg->tag == Iex_BBPTR)) {
356       addInstr(env, X86Instr_Push(X86RMI_Reg(hregX86_EBP())));
357       return 1;
358    }
359    /* Else it's a "normal" expression. */
360    IRType arg_ty = typeOfIRExpr(env->type_env, arg);
361    if (arg_ty == Ity_I32) {
362       addInstr(env, X86Instr_Push(iselIntExpr_RMI(env, arg)));
363       return 1;
364    } else
365    if (arg_ty == Ity_I64) {
366       HReg rHi, rLo;
367       iselInt64Expr(&rHi, &rLo, env, arg);
368       addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
369       addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
370       return 2;
371    }
372    ppIRExpr(arg);
373    vpanic("pushArg(x86): can't handle arg of this type");
374 }
375 
376 
377 /* Complete the call to a helper function, by calling the
378    helper and clearing the args off the stack. */
379 
380 static
callHelperAndClearArgs(ISelEnv * env,X86CondCode cc,IRCallee * cee,Int n_arg_ws,RetLoc rloc)381 void callHelperAndClearArgs ( ISelEnv* env, X86CondCode cc,
382                               IRCallee* cee, Int n_arg_ws,
383                               RetLoc rloc )
384 {
385    /* Complication.  Need to decide which reg to use as the fn address
386       pointer, in a way that doesn't trash regparm-passed
387       parameters. */
388    vassert(sizeof(void*) == 4);
389 
390    addInstr(env, X86Instr_Call( cc, (Addr)cee->addr,
391                                 cee->regparms, rloc));
392    if (n_arg_ws > 0)
393       add_to_esp(env, 4*n_arg_ws);
394 }
395 
396 
397 /* Used only in doHelperCall.  See big comment in doHelperCall re
398    handling of regparm args.  This function figures out whether
399    evaluation of an expression might require use of a fixed register.
400    If in doubt return True (safe but suboptimal).
401 */
402 static
mightRequireFixedRegs(IRExpr * e)403 Bool mightRequireFixedRegs ( IRExpr* e )
404 {
405    if (UNLIKELY(is_IRExpr_VECRET_or_BBPTR(e))) {
406       // These are always "safe" -- either a copy of %esp in some
407       // arbitrary vreg, or a copy of %ebp, respectively.
408       return False;
409    }
410    /* Else it's a "normal" expression. */
411    switch (e->tag) {
412       case Iex_RdTmp: case Iex_Const: case Iex_Get:
413          return False;
414       default:
415          return True;
416    }
417 }
418 
419 
420 /* Do a complete function call.  |guard| is a Ity_Bit expression
421    indicating whether or not the call happens.  If guard==NULL, the
422    call is unconditional.  |retloc| is set to indicate where the
423    return value is after the call.  The caller (of this fn) must
424    generate code to add |stackAdjustAfterCall| to the stack pointer
425    after the call is done. */
426 
427 static
doHelperCall(UInt * stackAdjustAfterCall,RetLoc * retloc,ISelEnv * env,IRExpr * guard,IRCallee * cee,IRType retTy,IRExpr ** args)428 void doHelperCall ( /*OUT*/UInt*   stackAdjustAfterCall,
429                     /*OUT*/RetLoc* retloc,
430                     ISelEnv* env,
431                     IRExpr* guard,
432                     IRCallee* cee, IRType retTy, IRExpr** args )
433 {
434    X86CondCode cc;
435    HReg        argregs[3];
436    HReg        tmpregs[3];
437    Bool        danger;
438    Int         not_done_yet, n_args, n_arg_ws, stack_limit,
439                i, argreg, argregX;
440 
441    /* Set default returns.  We'll update them later if needed. */
442    *stackAdjustAfterCall = 0;
443    *retloc               = mk_RetLoc_INVALID();
444 
445    /* These are used for cross-checking that IR-level constraints on
446       the use of Iex_VECRET and Iex_BBPTR are observed. */
447    UInt nVECRETs = 0;
448    UInt nBBPTRs  = 0;
449 
450    /* Marshal args for a call, do the call, and clear the stack.
451       Complexities to consider:
452 
453       * The return type can be I{64,32,16,8} or V128.  In the V128
454         case, it is expected that |args| will contain the special
455         node IRExpr_VECRET(), in which case this routine generates
456         code to allocate space on the stack for the vector return
457         value.  Since we are not passing any scalars on the stack, it
458         is enough to preallocate the return space before marshalling
459         any arguments, in this case.
460 
461         |args| may also contain IRExpr_BBPTR(), in which case the
462         value in %ebp is passed as the corresponding argument.
463 
464       * If the callee claims regparmness of 1, 2 or 3, we must pass the
465         first 1, 2 or 3 args in registers (EAX, EDX, and ECX
466         respectively).  To keep things relatively simple, only args of
467         type I32 may be passed as regparms -- just bomb out if anything
468         else turns up.  Clearly this depends on the front ends not
469         trying to pass any other types as regparms.
470    */
471 
472    /* 16 Nov 2004: the regparm handling is complicated by the
473       following problem.
474 
475       Consider a call two a function with two regparm parameters:
476       f(e1,e2).  We need to compute e1 into %eax and e2 into %edx.
477       Suppose code is first generated to compute e1 into %eax.  Then,
478       code is generated to compute e2 into %edx.  Unfortunately, if
479       the latter code sequence uses %eax, it will trash the value of
480       e1 computed by the former sequence.  This could happen if (for
481       example) e2 itself involved a function call.  In the code below,
482       args are evaluated right-to-left, not left-to-right, but the
483       principle and the problem are the same.
484 
485       One solution is to compute all regparm-bound args into vregs
486       first, and once they are all done, move them to the relevant
487       real regs.  This always gives correct code, but it also gives
488       a bunch of vreg-to-rreg moves which are usually redundant but
489       are hard for the register allocator to get rid of.
490 
491       A compromise is to first examine all regparm'd argument
492       expressions.  If they are all so simple that it is clear
493       they will be evaluated without use of any fixed registers,
494       use the old compute-directly-to-fixed-target scheme.  If not,
495       be safe and use the via-vregs scheme.
496 
497       Note this requires being able to examine an expression and
498       determine whether or not evaluation of it might use a fixed
499       register.  That requires knowledge of how the rest of this
500       insn selector works.  Currently just the following 3 are
501       regarded as safe -- hopefully they cover the majority of
502       arguments in practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
503    */
504    vassert(cee->regparms >= 0 && cee->regparms <= 3);
505 
506    /* Count the number of args and also the VECRETs */
507    n_args = n_arg_ws = 0;
508    while (args[n_args]) {
509       IRExpr* arg = args[n_args];
510       n_args++;
511       if (UNLIKELY(arg->tag == Iex_VECRET)) {
512          nVECRETs++;
513       } else if (UNLIKELY(arg->tag == Iex_BBPTR)) {
514          nBBPTRs++;
515       }
516    }
517 
518    /* If this fails, the IR is ill-formed */
519    vassert(nBBPTRs == 0 || nBBPTRs == 1);
520 
521    /* If we have a VECRET, allocate space on the stack for the return
522       value, and record the stack pointer after that. */
523    HReg r_vecRetAddr = INVALID_HREG;
524    if (nVECRETs == 1) {
525       vassert(retTy == Ity_V128 || retTy == Ity_V256);
526       vassert(retTy != Ity_V256); // we don't handle that yet (if ever)
527       r_vecRetAddr = newVRegI(env);
528       sub_from_esp(env, 16);
529       addInstr(env, mk_iMOVsd_RR( hregX86_ESP(), r_vecRetAddr ));
530    } else {
531       // If either of these fail, the IR is ill-formed
532       vassert(retTy != Ity_V128 && retTy != Ity_V256);
533       vassert(nVECRETs == 0);
534    }
535 
536    not_done_yet = n_args;
537 
538    stack_limit = cee->regparms;
539 
540    /* ------ BEGIN marshall all arguments ------ */
541 
542    /* Push (R to L) the stack-passed args, [n_args-1 .. stack_limit] */
543    for (i = n_args-1; i >= stack_limit; i--) {
544       n_arg_ws += pushArg(env, args[i], r_vecRetAddr);
545       not_done_yet--;
546    }
547 
548    /* args [stack_limit-1 .. 0] and possibly %ebp are to be passed in
549       registers. */
550 
551    if (cee->regparms > 0) {
552 
553       /* ------ BEGIN deal with regparms ------ */
554 
555       /* deal with regparms, not forgetting %ebp if needed. */
556       argregs[0] = hregX86_EAX();
557       argregs[1] = hregX86_EDX();
558       argregs[2] = hregX86_ECX();
559       tmpregs[0] = tmpregs[1] = tmpregs[2] = INVALID_HREG;
560 
561       argreg = cee->regparms;
562 
563       /* In keeping with big comment above, detect potential danger
564          and use the via-vregs scheme if needed. */
565       danger = False;
566       for (i = stack_limit-1; i >= 0; i--) {
567          if (mightRequireFixedRegs(args[i])) {
568             danger = True;
569             break;
570          }
571       }
572 
573       if (danger) {
574 
575          /* Move via temporaries */
576          argregX = argreg;
577          for (i = stack_limit-1; i >= 0; i--) {
578 
579             if (0) {
580                vex_printf("x86 host: register param is complex: ");
581                ppIRExpr(args[i]);
582                vex_printf("\n");
583             }
584 
585             IRExpr* arg = args[i];
586             argreg--;
587             vassert(argreg >= 0);
588             if (UNLIKELY(arg->tag == Iex_VECRET)) {
589                vassert(0); //ATC
590             }
591             else if (UNLIKELY(arg->tag == Iex_BBPTR)) {
592                vassert(0); //ATC
593             } else {
594                vassert(typeOfIRExpr(env->type_env, arg) == Ity_I32);
595                tmpregs[argreg] = iselIntExpr_R(env, arg);
596             }
597             not_done_yet--;
598          }
599          for (i = stack_limit-1; i >= 0; i--) {
600             argregX--;
601             vassert(argregX >= 0);
602             addInstr( env, mk_iMOVsd_RR( tmpregs[argregX], argregs[argregX] ) );
603          }
604 
605       } else {
606          /* It's safe to compute all regparm args directly into their
607             target registers. */
608          for (i = stack_limit-1; i >= 0; i--) {
609             IRExpr* arg = args[i];
610             argreg--;
611             vassert(argreg >= 0);
612             if (UNLIKELY(arg->tag == Iex_VECRET)) {
613                vassert(!hregIsInvalid(r_vecRetAddr));
614                addInstr(env, X86Instr_Alu32R(Xalu_MOV,
615                                              X86RMI_Reg(r_vecRetAddr),
616                                              argregs[argreg]));
617             }
618             else if (UNLIKELY(arg->tag == Iex_BBPTR)) {
619                vassert(0); //ATC
620             } else {
621                vassert(typeOfIRExpr(env->type_env, arg) == Ity_I32);
622                addInstr(env, X86Instr_Alu32R(Xalu_MOV,
623                                              iselIntExpr_RMI(env, arg),
624                                              argregs[argreg]));
625             }
626             not_done_yet--;
627          }
628 
629       }
630 
631       /* ------ END deal with regparms ------ */
632 
633    }
634 
635    vassert(not_done_yet == 0);
636 
637    /* ------ END marshall all arguments ------ */
638 
639    /* Now we can compute the condition.  We can't do it earlier
640       because the argument computations could trash the condition
641       codes.  Be a bit clever to handle the common case where the
642       guard is 1:Bit. */
643    cc = Xcc_ALWAYS;
644    if (guard) {
645       if (guard->tag == Iex_Const
646           && guard->Iex.Const.con->tag == Ico_U1
647           && guard->Iex.Const.con->Ico.U1 == True) {
648          /* unconditional -- do nothing */
649       } else {
650          cc = iselCondCode( env, guard );
651       }
652    }
653 
654    /* Do final checks, set the return values, and generate the call
655       instruction proper. */
656    vassert(*stackAdjustAfterCall == 0);
657    vassert(is_RetLoc_INVALID(*retloc));
658    switch (retTy) {
659          case Ity_INVALID:
660             /* Function doesn't return a value. */
661             *retloc = mk_RetLoc_simple(RLPri_None);
662             break;
663          case Ity_I64:
664             *retloc = mk_RetLoc_simple(RLPri_2Int);
665             break;
666          case Ity_I32: case Ity_I16: case Ity_I8:
667             *retloc = mk_RetLoc_simple(RLPri_Int);
668             break;
669          case Ity_V128:
670             *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
671             *stackAdjustAfterCall = 16;
672             break;
673          case Ity_V256:
674             vassert(0); // ATC
675             *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
676             *stackAdjustAfterCall = 32;
677             break;
678          default:
679             /* IR can denote other possible return types, but we don't
680                handle those here. */
681            vassert(0);
682    }
683 
684    /* Finally, generate the call itself.  This needs the *retloc value
685       set in the switch above, which is why it's at the end. */
686    callHelperAndClearArgs( env, cc, cee, n_arg_ws, *retloc );
687 }
688 
689 
690 /* Given a guest-state array descriptor, an index expression and a
691    bias, generate an X86AMode holding the relevant guest state
692    offset. */
693 
694 static
genGuestArrayOffset(ISelEnv * env,IRRegArray * descr,IRExpr * off,Int bias)695 X86AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
696                                 IRExpr* off, Int bias )
697 {
698    HReg tmp, roff;
699    Int  elemSz = sizeofIRType(descr->elemTy);
700    Int  nElems = descr->nElems;
701    Int  shift  = 0;
702 
703    /* throw out any cases not generated by an x86 front end.  In
704       theory there might be a day where we need to handle them -- if
705       we ever run non-x86-guest on x86 host. */
706 
707    if (nElems != 8)
708       vpanic("genGuestArrayOffset(x86 host)(1)");
709 
710    switch (elemSz) {
711       case 1:  shift = 0; break;
712       case 4:  shift = 2; break;
713       case 8:  shift = 3; break;
714       default: vpanic("genGuestArrayOffset(x86 host)(2)");
715    }
716 
717    /* Compute off into a reg, %off.  Then return:
718 
719          movl %off, %tmp
720          addl $bias, %tmp  (if bias != 0)
721          andl %tmp, 7
722          ... base(%ebp, %tmp, shift) ...
723    */
724    tmp  = newVRegI(env);
725    roff = iselIntExpr_R(env, off);
726    addInstr(env, mk_iMOVsd_RR(roff, tmp));
727    if (bias != 0) {
728       addInstr(env,
729                X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(bias), tmp));
730    }
731    addInstr(env,
732             X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(7), tmp));
733    return
734       X86AMode_IRRS( descr->base, hregX86_EBP(), tmp, shift );
735 }
736 
737 
738 /* Mess with the FPU's rounding mode: set to the default rounding mode
739    (DEFAULT_FPUCW). */
740 static
set_FPU_rounding_default(ISelEnv * env)741 void set_FPU_rounding_default ( ISelEnv* env )
742 {
743    /* pushl $DEFAULT_FPUCW
744       fldcw 0(%esp)
745       addl $4, %esp
746    */
747    X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
748    addInstr(env, X86Instr_Push(X86RMI_Imm(DEFAULT_FPUCW)));
749    addInstr(env, X86Instr_FpLdCW(zero_esp));
750    add_to_esp(env, 4);
751 }
752 
753 
754 /* Mess with the FPU's rounding mode: 'mode' is an I32-typed
755    expression denoting a value in the range 0 .. 3, indicating a round
756    mode encoded as per type IRRoundingMode.  Set the x87 FPU to have
757    the same rounding.
758 */
759 static
set_FPU_rounding_mode(ISelEnv * env,IRExpr * mode)760 void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
761 {
762    HReg rrm  = iselIntExpr_R(env, mode);
763    HReg rrm2 = newVRegI(env);
764    X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
765 
766    /* movl  %rrm, %rrm2
767       andl  $3, %rrm2   -- shouldn't be needed; paranoia
768       shll  $10, %rrm2
769       orl   $DEFAULT_FPUCW, %rrm2
770       pushl %rrm2
771       fldcw 0(%esp)
772       addl  $4, %esp
773    */
774    addInstr(env, mk_iMOVsd_RR(rrm, rrm2));
775    addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(3), rrm2));
776    addInstr(env, X86Instr_Sh32(Xsh_SHL, 10, rrm2));
777    addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Imm(DEFAULT_FPUCW), rrm2));
778    addInstr(env, X86Instr_Push(X86RMI_Reg(rrm2)));
779    addInstr(env, X86Instr_FpLdCW(zero_esp));
780    add_to_esp(env, 4);
781 }
782 
783 
784 /* Generate !src into a new vector register, and be sure that the code
785    is SSE1 compatible.  Amazing that Intel doesn't offer a less crappy
786    way to do this.
787 */
do_sse_Not128(ISelEnv * env,HReg src)788 static HReg do_sse_Not128 ( ISelEnv* env, HReg src )
789 {
790    HReg dst = newVRegV(env);
791    /* Set dst to zero.  If dst contains a NaN then all hell might
792       break loose after the comparison.  So, first zero it. */
793    addInstr(env, X86Instr_SseReRg(Xsse_XOR, dst, dst));
794    /* And now make it all 1s ... */
795    addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, dst, dst));
796    /* Finally, xor 'src' into it. */
797    addInstr(env, X86Instr_SseReRg(Xsse_XOR, src, dst));
798    /* Doesn't that just totally suck? */
799    return dst;
800 }
801 
802 
803 /* Round an x87 FPU value to 53-bit-mantissa precision, to be used
804    after most non-simple FPU operations (simple = +, -, *, / and
805    sqrt).
806 
807    This could be done a lot more efficiently if needed, by loading
808    zero and adding it to the value to be rounded (fldz ; faddp?).
809 */
roundToF64(ISelEnv * env,HReg reg)810 static void roundToF64 ( ISelEnv* env, HReg reg )
811 {
812    X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
813    sub_from_esp(env, 8);
814    addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, reg, zero_esp));
815    addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, reg, zero_esp));
816    add_to_esp(env, 8);
817 }
818 
819 
820 /*---------------------------------------------------------*/
821 /*--- ISEL: Integer expressions (32/16/8 bit)           ---*/
822 /*---------------------------------------------------------*/
823 
824 /* Select insns for an integer-typed expression, and add them to the
825    code list.  Return a reg holding the result.  This reg will be a
826    virtual register.  THE RETURNED REG MUST NOT BE MODIFIED.  If you
827    want to modify it, ask for a new vreg, copy it in there, and modify
828    the copy.  The register allocator will do its best to map both
829    vregs to the same real register, so the copies will often disappear
830    later in the game.
831 
832    This should handle expressions of 32, 16 and 8-bit type.  All
833    results are returned in a 32-bit register.  For 16- and 8-bit
834    expressions, the upper 16/24 bits are arbitrary, so you should mask
835    or sign extend partial values if necessary.
836 */
837 
iselIntExpr_R(ISelEnv * env,IRExpr * e)838 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
839 {
840    HReg r = iselIntExpr_R_wrk(env, e);
841    /* sanity checks ... */
842 #  if 0
843    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
844 #  endif
845    vassert(hregClass(r) == HRcInt32);
846    vassert(hregIsVirtual(r));
847    return r;
848 }
849 
850 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_R_wrk(ISelEnv * env,IRExpr * e)851 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
852 {
853    MatchInfo mi;
854 
855    IRType ty = typeOfIRExpr(env->type_env,e);
856    vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
857 
858    switch (e->tag) {
859 
860    /* --------- TEMP --------- */
861    case Iex_RdTmp: {
862       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
863    }
864 
865    /* --------- LOAD --------- */
866    case Iex_Load: {
867       HReg dst = newVRegI(env);
868       X86AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr );
869 
870       /* We can't handle big-endian loads, nor load-linked. */
871       if (e->Iex.Load.end != Iend_LE)
872          goto irreducible;
873 
874       if (ty == Ity_I32) {
875          addInstr(env, X86Instr_Alu32R(Xalu_MOV,
876                                        X86RMI_Mem(amode), dst) );
877          return dst;
878       }
879       if (ty == Ity_I16) {
880          addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
881          return dst;
882       }
883       if (ty == Ity_I8) {
884          addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
885          return dst;
886       }
887       break;
888    }
889 
890    /* --------- TERNARY OP --------- */
891    case Iex_Triop: {
892       IRTriop *triop = e->Iex.Triop.details;
893       /* C3210 flags following FPU partial remainder (fprem), both
894          IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
895       if (triop->op == Iop_PRemC3210F64
896           || triop->op == Iop_PRem1C3210F64) {
897          HReg junk = newVRegF(env);
898          HReg dst  = newVRegI(env);
899          HReg srcL = iselDblExpr(env, triop->arg2);
900          HReg srcR = iselDblExpr(env, triop->arg3);
901          /* XXXROUNDINGFIXME */
902          /* set roundingmode here */
903          addInstr(env, X86Instr_FpBinary(
904                            e->Iex.Binop.op==Iop_PRemC3210F64
905                               ? Xfp_PREM : Xfp_PREM1,
906                            srcL,srcR,junk
907                  ));
908          /* The previous pseudo-insn will have left the FPU's C3210
909             flags set correctly.  So bag them. */
910          addInstr(env, X86Instr_FpStSW_AX());
911          addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
912          addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
913          return dst;
914       }
915 
916       break;
917    }
918 
919    /* --------- BINARY OP --------- */
920    case Iex_Binop: {
921       X86AluOp   aluOp;
922       X86ShiftOp shOp;
923 
924       /* Pattern: Sub32(0,x) */
925       if (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1)) {
926          HReg dst = newVRegI(env);
927          HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2);
928          addInstr(env, mk_iMOVsd_RR(reg,dst));
929          addInstr(env, X86Instr_Unary32(Xun_NEG,dst));
930          return dst;
931       }
932 
933       /* Is it an addition or logical style op? */
934       switch (e->Iex.Binop.op) {
935          case Iop_Add8: case Iop_Add16: case Iop_Add32:
936             aluOp = Xalu_ADD; break;
937          case Iop_Sub8: case Iop_Sub16: case Iop_Sub32:
938             aluOp = Xalu_SUB; break;
939          case Iop_And8: case Iop_And16: case Iop_And32:
940             aluOp = Xalu_AND; break;
941          case Iop_Or8: case Iop_Or16: case Iop_Or32:
942             aluOp = Xalu_OR; break;
943          case Iop_Xor8: case Iop_Xor16: case Iop_Xor32:
944             aluOp = Xalu_XOR; break;
945          case Iop_Mul16: case Iop_Mul32:
946             aluOp = Xalu_MUL; break;
947          default:
948             aluOp = Xalu_INVALID; break;
949       }
950       /* For commutative ops we assume any literal
951          values are on the second operand. */
952       if (aluOp != Xalu_INVALID) {
953          HReg dst    = newVRegI(env);
954          HReg reg    = iselIntExpr_R(env, e->Iex.Binop.arg1);
955          X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
956          addInstr(env, mk_iMOVsd_RR(reg,dst));
957          addInstr(env, X86Instr_Alu32R(aluOp, rmi, dst));
958          return dst;
959       }
960       /* Could do better here; forcing the first arg into a reg
961          isn't always clever.
962          -- t70 = Xor32(And32(Xor32(LDle:I32(Add32(t41,0xFFFFFFA0:I32)),
963                         LDle:I32(Add32(t41,0xFFFFFFA4:I32))),LDle:I32(Add32(
964                         t41,0xFFFFFFA8:I32))),LDle:I32(Add32(t41,0xFFFFFFA0:I32)))
965             movl 0xFFFFFFA0(%vr41),%vr107
966             movl 0xFFFFFFA4(%vr41),%vr108
967             movl %vr107,%vr106
968             xorl %vr108,%vr106
969             movl 0xFFFFFFA8(%vr41),%vr109
970             movl %vr106,%vr105
971             andl %vr109,%vr105
972             movl 0xFFFFFFA0(%vr41),%vr110
973             movl %vr105,%vr104
974             xorl %vr110,%vr104
975             movl %vr104,%vr70
976       */
977 
978       /* Perhaps a shift op? */
979       switch (e->Iex.Binop.op) {
980          case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
981             shOp = Xsh_SHL; break;
982          case Iop_Shr32: case Iop_Shr16: case Iop_Shr8:
983             shOp = Xsh_SHR; break;
984          case Iop_Sar32: case Iop_Sar16: case Iop_Sar8:
985             shOp = Xsh_SAR; break;
986          default:
987             shOp = Xsh_INVALID; break;
988       }
989       if (shOp != Xsh_INVALID) {
990          HReg dst = newVRegI(env);
991 
992          /* regL = the value to be shifted */
993          HReg regL   = iselIntExpr_R(env, e->Iex.Binop.arg1);
994          addInstr(env, mk_iMOVsd_RR(regL,dst));
995 
996          /* Do any necessary widening for 16/8 bit operands */
997          switch (e->Iex.Binop.op) {
998             case Iop_Shr8:
999                addInstr(env, X86Instr_Alu32R(
1000                                 Xalu_AND, X86RMI_Imm(0xFF), dst));
1001                break;
1002             case Iop_Shr16:
1003                addInstr(env, X86Instr_Alu32R(
1004                                 Xalu_AND, X86RMI_Imm(0xFFFF), dst));
1005                break;
1006             case Iop_Sar8:
1007                addInstr(env, X86Instr_Sh32(Xsh_SHL, 24, dst));
1008                addInstr(env, X86Instr_Sh32(Xsh_SAR, 24, dst));
1009                break;
1010             case Iop_Sar16:
1011                addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, dst));
1012                addInstr(env, X86Instr_Sh32(Xsh_SAR, 16, dst));
1013                break;
1014             default: break;
1015          }
1016 
1017          /* Now consider the shift amount.  If it's a literal, we
1018             can do a much better job than the general case. */
1019          if (e->Iex.Binop.arg2->tag == Iex_Const) {
1020             /* assert that the IR is well-typed */
1021             Int nshift;
1022             vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1023             nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1024 	    vassert(nshift >= 0);
1025 	    if (nshift > 0)
1026                /* Can't allow nshift==0 since that means %cl */
1027                addInstr(env, X86Instr_Sh32( shOp, nshift, dst ));
1028          } else {
1029             /* General case; we have to force the amount into %cl. */
1030             HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1031             addInstr(env, mk_iMOVsd_RR(regR,hregX86_ECX()));
1032             addInstr(env, X86Instr_Sh32(shOp, 0/* %cl */, dst));
1033          }
1034          return dst;
1035       }
1036 
1037       /* Handle misc other ops. */
1038 
1039       if (e->Iex.Binop.op == Iop_Max32U) {
1040          HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1041          HReg dst  = newVRegI(env);
1042          HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
1043          addInstr(env, mk_iMOVsd_RR(src1,dst));
1044          addInstr(env, X86Instr_Alu32R(Xalu_CMP, X86RMI_Reg(src2), dst));
1045          addInstr(env, X86Instr_CMov32(Xcc_B, X86RM_Reg(src2), dst));
1046          return dst;
1047       }
1048 
1049       if (e->Iex.Binop.op == Iop_8HLto16) {
1050          HReg hi8  = newVRegI(env);
1051          HReg lo8  = newVRegI(env);
1052          HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1053          HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1054          addInstr(env, mk_iMOVsd_RR(hi8s, hi8));
1055          addInstr(env, mk_iMOVsd_RR(lo8s, lo8));
1056          addInstr(env, X86Instr_Sh32(Xsh_SHL, 8, hi8));
1057          addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFF), lo8));
1058          addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo8), hi8));
1059          return hi8;
1060       }
1061 
1062       if (e->Iex.Binop.op == Iop_16HLto32) {
1063          HReg hi16  = newVRegI(env);
1064          HReg lo16  = newVRegI(env);
1065          HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1066          HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1067          addInstr(env, mk_iMOVsd_RR(hi16s, hi16));
1068          addInstr(env, mk_iMOVsd_RR(lo16s, lo16));
1069          addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, hi16));
1070          addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFFFF), lo16));
1071          addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo16), hi16));
1072          return hi16;
1073       }
1074 
1075       if (e->Iex.Binop.op == Iop_MullS16 || e->Iex.Binop.op == Iop_MullS8
1076           || e->Iex.Binop.op == Iop_MullU16 || e->Iex.Binop.op == Iop_MullU8) {
1077          HReg a16   = newVRegI(env);
1078          HReg b16   = newVRegI(env);
1079          HReg a16s  = iselIntExpr_R(env, e->Iex.Binop.arg1);
1080          HReg b16s  = iselIntExpr_R(env, e->Iex.Binop.arg2);
1081          Int  shift = (e->Iex.Binop.op == Iop_MullS8
1082                        || e->Iex.Binop.op == Iop_MullU8)
1083                          ? 24 : 16;
1084          X86ShiftOp shr_op = (e->Iex.Binop.op == Iop_MullS8
1085                               || e->Iex.Binop.op == Iop_MullS16)
1086                                 ? Xsh_SAR : Xsh_SHR;
1087 
1088          addInstr(env, mk_iMOVsd_RR(a16s, a16));
1089          addInstr(env, mk_iMOVsd_RR(b16s, b16));
1090          addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, a16));
1091          addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, b16));
1092          addInstr(env, X86Instr_Sh32(shr_op,  shift, a16));
1093          addInstr(env, X86Instr_Sh32(shr_op,  shift, b16));
1094          addInstr(env, X86Instr_Alu32R(Xalu_MUL, X86RMI_Reg(a16), b16));
1095          return b16;
1096       }
1097 
1098       if (e->Iex.Binop.op == Iop_CmpF64) {
1099          HReg fL = iselDblExpr(env, e->Iex.Binop.arg1);
1100          HReg fR = iselDblExpr(env, e->Iex.Binop.arg2);
1101          HReg dst = newVRegI(env);
1102          addInstr(env, X86Instr_FpCmp(fL,fR,dst));
1103          /* shift this right 8 bits so as to conform to CmpF64
1104             definition. */
1105          addInstr(env, X86Instr_Sh32(Xsh_SHR, 8, dst));
1106          return dst;
1107       }
1108 
1109       if (e->Iex.Binop.op == Iop_F64toI32S
1110           || e->Iex.Binop.op == Iop_F64toI16S) {
1111          Int  sz  = e->Iex.Binop.op == Iop_F64toI16S ? 2 : 4;
1112          HReg rf  = iselDblExpr(env, e->Iex.Binop.arg2);
1113          HReg dst = newVRegI(env);
1114 
1115          /* Used several times ... */
1116          X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
1117 
1118 	 /* rf now holds the value to be converted, and rrm holds the
1119 	    rounding mode value, encoded as per the IRRoundingMode
1120 	    enum.  The first thing to do is set the FPU's rounding
1121 	    mode accordingly. */
1122 
1123          /* Create a space for the format conversion. */
1124          /* subl $4, %esp */
1125          sub_from_esp(env, 4);
1126 
1127 	 /* Set host rounding mode */
1128 	 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
1129 
1130          /* gistw/l %rf, 0(%esp) */
1131          addInstr(env, X86Instr_FpLdStI(False/*store*/,
1132                                         toUChar(sz), rf, zero_esp));
1133 
1134          if (sz == 2) {
1135             /* movzwl 0(%esp), %dst */
1136             addInstr(env, X86Instr_LoadEX(2,False,zero_esp,dst));
1137          } else {
1138             /* movl 0(%esp), %dst */
1139             vassert(sz == 4);
1140             addInstr(env, X86Instr_Alu32R(
1141                              Xalu_MOV, X86RMI_Mem(zero_esp), dst));
1142          }
1143 
1144 	 /* Restore default FPU rounding. */
1145          set_FPU_rounding_default( env );
1146 
1147          /* addl $4, %esp */
1148 	 add_to_esp(env, 4);
1149          return dst;
1150       }
1151 
1152       break;
1153    }
1154 
1155    /* --------- UNARY OP --------- */
1156    case Iex_Unop: {
1157 
1158       /* 1Uto8(32to1(expr32)) */
1159       if (e->Iex.Unop.op == Iop_1Uto8) {
1160          DECLARE_PATTERN(p_32to1_then_1Uto8);
1161          DEFINE_PATTERN(p_32to1_then_1Uto8,
1162                         unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
1163          if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
1164             IRExpr* expr32 = mi.bindee[0];
1165             HReg dst = newVRegI(env);
1166             HReg src = iselIntExpr_R(env, expr32);
1167             addInstr(env, mk_iMOVsd_RR(src,dst) );
1168             addInstr(env, X86Instr_Alu32R(Xalu_AND,
1169                                           X86RMI_Imm(1), dst));
1170             return dst;
1171          }
1172       }
1173 
1174       /* 8Uto32(LDle(expr32)) */
1175       if (e->Iex.Unop.op == Iop_8Uto32) {
1176          DECLARE_PATTERN(p_LDle8_then_8Uto32);
1177          DEFINE_PATTERN(p_LDle8_then_8Uto32,
1178                         unop(Iop_8Uto32,
1179                              IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1180          if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
1181             HReg dst = newVRegI(env);
1182             X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1183             addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1184             return dst;
1185          }
1186       }
1187 
1188       /* 8Sto32(LDle(expr32)) */
1189       if (e->Iex.Unop.op == Iop_8Sto32) {
1190          DECLARE_PATTERN(p_LDle8_then_8Sto32);
1191          DEFINE_PATTERN(p_LDle8_then_8Sto32,
1192                         unop(Iop_8Sto32,
1193                              IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1194          if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
1195             HReg dst = newVRegI(env);
1196             X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1197             addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
1198             return dst;
1199          }
1200       }
1201 
1202       /* 16Uto32(LDle(expr32)) */
1203       if (e->Iex.Unop.op == Iop_16Uto32) {
1204          DECLARE_PATTERN(p_LDle16_then_16Uto32);
1205          DEFINE_PATTERN(p_LDle16_then_16Uto32,
1206                         unop(Iop_16Uto32,
1207                              IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1208          if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
1209             HReg dst = newVRegI(env);
1210             X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1211             addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1212             return dst;
1213          }
1214       }
1215 
1216       /* 8Uto32(GET:I8) */
1217       if (e->Iex.Unop.op == Iop_8Uto32) {
1218          if (e->Iex.Unop.arg->tag == Iex_Get) {
1219             HReg      dst;
1220             X86AMode* amode;
1221             vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
1222             dst = newVRegI(env);
1223             amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1224                                 hregX86_EBP());
1225             addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1226             return dst;
1227          }
1228       }
1229 
1230       /* 16to32(GET:I16) */
1231       if (e->Iex.Unop.op == Iop_16Uto32) {
1232          if (e->Iex.Unop.arg->tag == Iex_Get) {
1233             HReg      dst;
1234             X86AMode* amode;
1235             vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
1236             dst = newVRegI(env);
1237             amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1238                                 hregX86_EBP());
1239             addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1240             return dst;
1241          }
1242       }
1243 
1244       switch (e->Iex.Unop.op) {
1245          case Iop_8Uto16:
1246          case Iop_8Uto32:
1247          case Iop_16Uto32: {
1248             HReg dst = newVRegI(env);
1249             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1250             UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
1251             addInstr(env, mk_iMOVsd_RR(src,dst) );
1252             addInstr(env, X86Instr_Alu32R(Xalu_AND,
1253                                           X86RMI_Imm(mask), dst));
1254             return dst;
1255          }
1256          case Iop_8Sto16:
1257          case Iop_8Sto32:
1258          case Iop_16Sto32: {
1259             HReg dst = newVRegI(env);
1260             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1261             UInt amt = e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24;
1262             addInstr(env, mk_iMOVsd_RR(src,dst) );
1263             addInstr(env, X86Instr_Sh32(Xsh_SHL, amt, dst));
1264             addInstr(env, X86Instr_Sh32(Xsh_SAR, amt, dst));
1265             return dst;
1266          }
1267 	 case Iop_Not8:
1268 	 case Iop_Not16:
1269          case Iop_Not32: {
1270             HReg dst = newVRegI(env);
1271             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1272             addInstr(env, mk_iMOVsd_RR(src,dst) );
1273             addInstr(env, X86Instr_Unary32(Xun_NOT,dst));
1274             return dst;
1275          }
1276          case Iop_64HIto32: {
1277             HReg rHi, rLo;
1278             iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1279             return rHi; /* and abandon rLo .. poor wee thing :-) */
1280          }
1281          case Iop_64to32: {
1282             HReg rHi, rLo;
1283             iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1284             return rLo; /* similar stupid comment to the above ... */
1285          }
1286          case Iop_16HIto8:
1287          case Iop_32HIto16: {
1288             HReg dst  = newVRegI(env);
1289             HReg src  = iselIntExpr_R(env, e->Iex.Unop.arg);
1290             Int shift = e->Iex.Unop.op == Iop_16HIto8 ? 8 : 16;
1291             addInstr(env, mk_iMOVsd_RR(src,dst) );
1292             addInstr(env, X86Instr_Sh32(Xsh_SHR, shift, dst));
1293             return dst;
1294          }
1295          case Iop_1Uto32:
1296          case Iop_1Uto8: {
1297             HReg dst         = newVRegI(env);
1298             X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1299             addInstr(env, X86Instr_Set32(cond,dst));
1300             return dst;
1301          }
1302          case Iop_1Sto8:
1303          case Iop_1Sto16:
1304          case Iop_1Sto32: {
1305             /* could do better than this, but for now ... */
1306             HReg dst         = newVRegI(env);
1307             X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1308             addInstr(env, X86Instr_Set32(cond,dst));
1309             addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
1310             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1311             return dst;
1312          }
1313          case Iop_Ctz32: {
1314             /* Count trailing zeroes, implemented by x86 'bsfl' */
1315             HReg dst = newVRegI(env);
1316             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1317             addInstr(env, X86Instr_Bsfr32(True,src,dst));
1318             return dst;
1319          }
1320          case Iop_Clz32: {
1321             /* Count leading zeroes.  Do 'bsrl' to establish the index
1322                of the highest set bit, and subtract that value from
1323                31. */
1324             HReg tmp = newVRegI(env);
1325             HReg dst = newVRegI(env);
1326             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1327             addInstr(env, X86Instr_Bsfr32(False,src,tmp));
1328             addInstr(env, X86Instr_Alu32R(Xalu_MOV,
1329                                           X86RMI_Imm(31), dst));
1330             addInstr(env, X86Instr_Alu32R(Xalu_SUB,
1331                                           X86RMI_Reg(tmp), dst));
1332             return dst;
1333          }
1334 
1335          case Iop_CmpwNEZ32: {
1336             HReg dst = newVRegI(env);
1337             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1338             addInstr(env, mk_iMOVsd_RR(src,dst));
1339             addInstr(env, X86Instr_Unary32(Xun_NEG,dst));
1340             addInstr(env, X86Instr_Alu32R(Xalu_OR,
1341                                           X86RMI_Reg(src), dst));
1342             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1343             return dst;
1344          }
1345          case Iop_Left8:
1346          case Iop_Left16:
1347          case Iop_Left32: {
1348             HReg dst = newVRegI(env);
1349             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1350             addInstr(env, mk_iMOVsd_RR(src, dst));
1351             addInstr(env, X86Instr_Unary32(Xun_NEG, dst));
1352             addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(src), dst));
1353             return dst;
1354          }
1355 
1356          case Iop_V128to32: {
1357             HReg      dst  = newVRegI(env);
1358             HReg      vec  = iselVecExpr(env, e->Iex.Unop.arg);
1359             X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
1360             sub_from_esp(env, 16);
1361             addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
1362             addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
1363             add_to_esp(env, 16);
1364             return dst;
1365          }
1366 
1367          /* ReinterpF32asI32(e) */
1368          /* Given an IEEE754 single, produce an I32 with the same bit
1369             pattern.  Keep stack 8-aligned even though only using 4
1370             bytes. */
1371          case Iop_ReinterpF32asI32: {
1372             HReg rf   = iselFltExpr(env, e->Iex.Unop.arg);
1373             HReg dst  = newVRegI(env);
1374             X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
1375             /* paranoia */
1376             set_FPU_rounding_default(env);
1377             /* subl $8, %esp */
1378             sub_from_esp(env, 8);
1379             /* gstF %rf, 0(%esp) */
1380             addInstr(env,
1381                      X86Instr_FpLdSt(False/*store*/, 4, rf, zero_esp));
1382             /* movl 0(%esp), %dst */
1383             addInstr(env,
1384                      X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), dst));
1385             /* addl $8, %esp */
1386             add_to_esp(env, 8);
1387             return dst;
1388          }
1389 
1390          case Iop_16to8:
1391          case Iop_32to8:
1392          case Iop_32to16:
1393             /* These are no-ops. */
1394             return iselIntExpr_R(env, e->Iex.Unop.arg);
1395 
1396          case Iop_GetMSBs8x8: {
1397             /* Note: the following assumes the helper is of
1398                signature
1399                   UInt fn ( ULong ), and is not a regparm fn.
1400             */
1401             HReg  xLo, xHi;
1402             HReg  dst = newVRegI(env);
1403             Addr fn = (Addr)h_generic_calc_GetMSBs8x8;
1404             iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg);
1405             addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
1406             addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
1407             addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
1408                                          0, mk_RetLoc_simple(RLPri_Int) ));
1409             add_to_esp(env, 2*4);
1410             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1411             return dst;
1412          }
1413 
1414          default:
1415             break;
1416       }
1417       break;
1418    }
1419 
1420    /* --------- GET --------- */
1421    case Iex_Get: {
1422       if (ty == Ity_I32) {
1423          HReg dst = newVRegI(env);
1424          addInstr(env, X86Instr_Alu32R(
1425                           Xalu_MOV,
1426                           X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset,
1427                                                  hregX86_EBP())),
1428                           dst));
1429          return dst;
1430       }
1431       if (ty == Ity_I8 || ty == Ity_I16) {
1432          HReg dst = newVRegI(env);
1433          addInstr(env, X86Instr_LoadEX(
1434                           toUChar(ty==Ity_I8 ? 1 : 2),
1435                           False,
1436                           X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
1437                           dst));
1438          return dst;
1439       }
1440       break;
1441    }
1442 
1443    case Iex_GetI: {
1444       X86AMode* am
1445          = genGuestArrayOffset(
1446               env, e->Iex.GetI.descr,
1447                    e->Iex.GetI.ix, e->Iex.GetI.bias );
1448       HReg dst = newVRegI(env);
1449       if (ty == Ity_I8) {
1450          addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
1451          return dst;
1452       }
1453       if (ty == Ity_I32) {
1454          addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
1455          return dst;
1456       }
1457       break;
1458    }
1459 
1460    /* --------- CCALL --------- */
1461    case Iex_CCall: {
1462       HReg    dst = newVRegI(env);
1463       vassert(ty == e->Iex.CCall.retty);
1464 
1465       /* be very restrictive for now.  Only 32/64-bit ints allowed for
1466          args, and 32 bits for return type.  Don't forget to change
1467          the RetLoc if more return types are allowed in future. */
1468       if (e->Iex.CCall.retty != Ity_I32)
1469          goto irreducible;
1470 
1471       /* Marshal args, do the call, clear stack. */
1472       UInt   addToSp = 0;
1473       RetLoc rloc    = mk_RetLoc_INVALID();
1474       doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
1475                     e->Iex.CCall.cee, e->Iex.CCall.retty, e->Iex.CCall.args );
1476       vassert(is_sane_RetLoc(rloc));
1477       vassert(rloc.pri == RLPri_Int);
1478       vassert(addToSp == 0);
1479 
1480       addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1481       return dst;
1482    }
1483 
1484    /* --------- LITERAL --------- */
1485    /* 32/16/8-bit literals */
1486    case Iex_Const: {
1487       X86RMI* rmi = iselIntExpr_RMI ( env, e );
1488       HReg    r   = newVRegI(env);
1489       addInstr(env, X86Instr_Alu32R(Xalu_MOV, rmi, r));
1490       return r;
1491    }
1492 
1493    /* --------- MULTIPLEX --------- */
1494    case Iex_ITE: { // VFD
1495      if ((ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8)
1496          && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
1497         HReg   r1  = iselIntExpr_R(env, e->Iex.ITE.iftrue);
1498         X86RM* r0  = iselIntExpr_RM(env, e->Iex.ITE.iffalse);
1499         HReg   dst = newVRegI(env);
1500         addInstr(env, mk_iMOVsd_RR(r1,dst));
1501         X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
1502         addInstr(env, X86Instr_CMov32(cc ^ 1, r0, dst));
1503         return dst;
1504       }
1505       break;
1506    }
1507 
1508    default:
1509    break;
1510    } /* switch (e->tag) */
1511 
1512    /* We get here if no pattern matched. */
1513   irreducible:
1514    ppIRExpr(e);
1515    vpanic("iselIntExpr_R: cannot reduce tree");
1516 }
1517 
1518 
1519 /*---------------------------------------------------------*/
1520 /*--- ISEL: Integer expression auxiliaries              ---*/
1521 /*---------------------------------------------------------*/
1522 
1523 /* --------------------- AMODEs --------------------- */
1524 
1525 /* Return an AMode which computes the value of the specified
1526    expression, possibly also adding insns to the code list as a
1527    result.  The expression may only be a 32-bit one.
1528 */
1529 
sane_AMode(X86AMode * am)1530 static Bool sane_AMode ( X86AMode* am )
1531 {
1532    switch (am->tag) {
1533       case Xam_IR:
1534          return
1535             toBool( hregClass(am->Xam.IR.reg) == HRcInt32
1536                     && (hregIsVirtual(am->Xam.IR.reg)
1537                         || sameHReg(am->Xam.IR.reg, hregX86_EBP())) );
1538       case Xam_IRRS:
1539          return
1540             toBool( hregClass(am->Xam.IRRS.base) == HRcInt32
1541                     && hregIsVirtual(am->Xam.IRRS.base)
1542                     && hregClass(am->Xam.IRRS.index) == HRcInt32
1543                     && hregIsVirtual(am->Xam.IRRS.index) );
1544       default:
1545         vpanic("sane_AMode: unknown x86 amode tag");
1546    }
1547 }
1548 
iselIntExpr_AMode(ISelEnv * env,IRExpr * e)1549 static X86AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e )
1550 {
1551    X86AMode* am = iselIntExpr_AMode_wrk(env, e);
1552    vassert(sane_AMode(am));
1553    return am;
1554 }
1555 
1556 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_AMode_wrk(ISelEnv * env,IRExpr * e)1557 static X86AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e )
1558 {
1559    IRType ty = typeOfIRExpr(env->type_env,e);
1560    vassert(ty == Ity_I32);
1561 
1562    /* Add32( Add32(expr1, Shl32(expr2, simm)), imm32 ) */
1563    if (e->tag == Iex_Binop
1564        && e->Iex.Binop.op == Iop_Add32
1565        && e->Iex.Binop.arg2->tag == Iex_Const
1566        && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32
1567        && e->Iex.Binop.arg1->tag == Iex_Binop
1568        && e->Iex.Binop.arg1->Iex.Binop.op == Iop_Add32
1569        && e->Iex.Binop.arg1->Iex.Binop.arg2->tag == Iex_Binop
1570        && e->Iex.Binop.arg1->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32
1571        && e->Iex.Binop.arg1
1572            ->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
1573        && e->Iex.Binop.arg1
1574            ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
1575       UInt shift = e->Iex.Binop.arg1
1576                     ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1577       UInt imm32 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1578       if (shift == 1 || shift == 2 || shift == 3) {
1579          HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1->Iex.Binop.arg1);
1580          HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg1
1581                                        ->Iex.Binop.arg2->Iex.Binop.arg1 );
1582          return X86AMode_IRRS(imm32, r1, r2, shift);
1583       }
1584    }
1585 
1586    /* Add32(expr1, Shl32(expr2, imm)) */
1587    if (e->tag == Iex_Binop
1588        && e->Iex.Binop.op == Iop_Add32
1589        && e->Iex.Binop.arg2->tag == Iex_Binop
1590        && e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32
1591        && e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
1592        && e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
1593       UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1594       if (shift == 1 || shift == 2 || shift == 3) {
1595          HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1596          HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 );
1597          return X86AMode_IRRS(0, r1, r2, shift);
1598       }
1599    }
1600 
1601    /* Add32(expr,i) */
1602    if (e->tag == Iex_Binop
1603        && e->Iex.Binop.op == Iop_Add32
1604        && e->Iex.Binop.arg2->tag == Iex_Const
1605        && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
1606       HReg r1 = iselIntExpr_R(env,  e->Iex.Binop.arg1);
1607       return X86AMode_IR(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32, r1);
1608    }
1609 
1610    /* Doesn't match anything in particular.  Generate it into
1611       a register and use that. */
1612    {
1613       HReg r1 = iselIntExpr_R(env, e);
1614       return X86AMode_IR(0, r1);
1615    }
1616 }
1617 
1618 
1619 /* --------------------- RMIs --------------------- */
1620 
1621 /* Similarly, calculate an expression into an X86RMI operand.  As with
1622    iselIntExpr_R, the expression can have type 32, 16 or 8 bits.  */
1623 
iselIntExpr_RMI(ISelEnv * env,IRExpr * e)1624 static X86RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e )
1625 {
1626    X86RMI* rmi = iselIntExpr_RMI_wrk(env, e);
1627    /* sanity checks ... */
1628    switch (rmi->tag) {
1629       case Xrmi_Imm:
1630          return rmi;
1631       case Xrmi_Reg:
1632          vassert(hregClass(rmi->Xrmi.Reg.reg) == HRcInt32);
1633          vassert(hregIsVirtual(rmi->Xrmi.Reg.reg));
1634          return rmi;
1635       case Xrmi_Mem:
1636          vassert(sane_AMode(rmi->Xrmi.Mem.am));
1637          return rmi;
1638       default:
1639          vpanic("iselIntExpr_RMI: unknown x86 RMI tag");
1640    }
1641 }
1642 
1643 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RMI_wrk(ISelEnv * env,IRExpr * e)1644 static X86RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e )
1645 {
1646    IRType ty = typeOfIRExpr(env->type_env,e);
1647    vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1648 
1649    /* special case: immediate */
1650    if (e->tag == Iex_Const) {
1651       UInt u;
1652       switch (e->Iex.Const.con->tag) {
1653          case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1654          case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1655          case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
1656          default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)");
1657       }
1658       return X86RMI_Imm(u);
1659    }
1660 
1661    /* special case: 32-bit GET */
1662    if (e->tag == Iex_Get && ty == Ity_I32) {
1663       return X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset,
1664                                     hregX86_EBP()));
1665    }
1666 
1667    /* special case: 32-bit load from memory */
1668    if (e->tag == Iex_Load && ty == Ity_I32
1669        && e->Iex.Load.end == Iend_LE) {
1670       X86AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
1671       return X86RMI_Mem(am);
1672    }
1673 
1674    /* default case: calculate into a register and return that */
1675    {
1676       HReg r = iselIntExpr_R ( env, e );
1677       return X86RMI_Reg(r);
1678    }
1679 }
1680 
1681 
1682 /* --------------------- RIs --------------------- */
1683 
1684 /* Calculate an expression into an X86RI operand.  As with
1685    iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1686 
iselIntExpr_RI(ISelEnv * env,IRExpr * e)1687 static X86RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e )
1688 {
1689    X86RI* ri = iselIntExpr_RI_wrk(env, e);
1690    /* sanity checks ... */
1691    switch (ri->tag) {
1692       case Xri_Imm:
1693          return ri;
1694       case Xri_Reg:
1695          vassert(hregClass(ri->Xri.Reg.reg) == HRcInt32);
1696          vassert(hregIsVirtual(ri->Xri.Reg.reg));
1697          return ri;
1698       default:
1699          vpanic("iselIntExpr_RI: unknown x86 RI tag");
1700    }
1701 }
1702 
1703 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RI_wrk(ISelEnv * env,IRExpr * e)1704 static X86RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e )
1705 {
1706    IRType ty = typeOfIRExpr(env->type_env,e);
1707    vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1708 
1709    /* special case: immediate */
1710    if (e->tag == Iex_Const) {
1711       UInt u;
1712       switch (e->Iex.Const.con->tag) {
1713          case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1714          case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1715          case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
1716          default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)");
1717       }
1718       return X86RI_Imm(u);
1719    }
1720 
1721    /* default case: calculate into a register and return that */
1722    {
1723       HReg r = iselIntExpr_R ( env, e );
1724       return X86RI_Reg(r);
1725    }
1726 }
1727 
1728 
1729 /* --------------------- RMs --------------------- */
1730 
1731 /* Similarly, calculate an expression into an X86RM operand.  As with
1732    iselIntExpr_R, the expression can have type 32, 16 or 8 bits.  */
1733 
iselIntExpr_RM(ISelEnv * env,IRExpr * e)1734 static X86RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e )
1735 {
1736    X86RM* rm = iselIntExpr_RM_wrk(env, e);
1737    /* sanity checks ... */
1738    switch (rm->tag) {
1739       case Xrm_Reg:
1740          vassert(hregClass(rm->Xrm.Reg.reg) == HRcInt32);
1741          vassert(hregIsVirtual(rm->Xrm.Reg.reg));
1742          return rm;
1743       case Xrm_Mem:
1744          vassert(sane_AMode(rm->Xrm.Mem.am));
1745          return rm;
1746       default:
1747          vpanic("iselIntExpr_RM: unknown x86 RM tag");
1748    }
1749 }
1750 
1751 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RM_wrk(ISelEnv * env,IRExpr * e)1752 static X86RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e )
1753 {
1754    IRType ty = typeOfIRExpr(env->type_env,e);
1755    vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1756 
1757    /* special case: 32-bit GET */
1758    if (e->tag == Iex_Get && ty == Ity_I32) {
1759       return X86RM_Mem(X86AMode_IR(e->Iex.Get.offset,
1760                                    hregX86_EBP()));
1761    }
1762 
1763    /* special case: load from memory */
1764 
1765    /* default case: calculate into a register and return that */
1766    {
1767       HReg r = iselIntExpr_R ( env, e );
1768       return X86RM_Reg(r);
1769    }
1770 }
1771 
1772 
1773 /* --------------------- CONDCODE --------------------- */
1774 
1775 /* Generate code to evaluated a bit-typed expression, returning the
1776    condition code which would correspond when the expression would
1777    notionally have returned 1. */
1778 
iselCondCode(ISelEnv * env,IRExpr * e)1779 static X86CondCode iselCondCode ( ISelEnv* env, IRExpr* e )
1780 {
1781    /* Uh, there's nothing we can sanity check here, unfortunately. */
1782    return iselCondCode_wrk(env,e);
1783 }
1784 
1785 /* DO NOT CALL THIS DIRECTLY ! */
iselCondCode_wrk(ISelEnv * env,IRExpr * e)1786 static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
1787 {
1788    MatchInfo mi;
1789 
1790    vassert(e);
1791    vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1792 
1793    /* var */
1794    if (e->tag == Iex_RdTmp) {
1795       HReg r32 = lookupIRTemp(env, e->Iex.RdTmp.tmp);
1796       /* Test32 doesn't modify r32; so this is OK. */
1797       addInstr(env, X86Instr_Test32(1,X86RM_Reg(r32)));
1798       return Xcc_NZ;
1799    }
1800 
1801    /* Constant 1:Bit */
1802    if (e->tag == Iex_Const) {
1803       HReg r;
1804       vassert(e->Iex.Const.con->tag == Ico_U1);
1805       vassert(e->Iex.Const.con->Ico.U1 == True
1806               || e->Iex.Const.con->Ico.U1 == False);
1807       r = newVRegI(env);
1808       addInstr(env, X86Instr_Alu32R(Xalu_MOV,X86RMI_Imm(0),r));
1809       addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(r),r));
1810       return e->Iex.Const.con->Ico.U1 ? Xcc_Z : Xcc_NZ;
1811    }
1812 
1813    /* Not1(e) */
1814    if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1815       /* Generate code for the arg, and negate the test condition */
1816       return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
1817    }
1818 
1819    /* --- patterns rooted at: 32to1 --- */
1820 
1821    if (e->tag == Iex_Unop
1822        && e->Iex.Unop.op == Iop_32to1) {
1823       X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1824       addInstr(env, X86Instr_Test32(1,rm));
1825       return Xcc_NZ;
1826    }
1827 
1828    /* --- patterns rooted at: CmpNEZ8 --- */
1829 
1830    /* CmpNEZ8(x) */
1831    if (e->tag == Iex_Unop
1832        && e->Iex.Unop.op == Iop_CmpNEZ8) {
1833       X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1834       addInstr(env, X86Instr_Test32(0xFF,rm));
1835       return Xcc_NZ;
1836    }
1837 
1838    /* --- patterns rooted at: CmpNEZ16 --- */
1839 
1840    /* CmpNEZ16(x) */
1841    if (e->tag == Iex_Unop
1842        && e->Iex.Unop.op == Iop_CmpNEZ16) {
1843       X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1844       addInstr(env, X86Instr_Test32(0xFFFF,rm));
1845       return Xcc_NZ;
1846    }
1847 
1848    /* --- patterns rooted at: CmpNEZ32 --- */
1849 
1850    /* CmpNEZ32(And32(x,y)) */
1851    {
1852       DECLARE_PATTERN(p_CmpNEZ32_And32);
1853       DEFINE_PATTERN(p_CmpNEZ32_And32,
1854                      unop(Iop_CmpNEZ32, binop(Iop_And32, bind(0), bind(1))));
1855       if (matchIRExpr(&mi, p_CmpNEZ32_And32, e)) {
1856          HReg    r0   = iselIntExpr_R(env, mi.bindee[0]);
1857          X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
1858          HReg    tmp  = newVRegI(env);
1859          addInstr(env, mk_iMOVsd_RR(r0, tmp));
1860          addInstr(env, X86Instr_Alu32R(Xalu_AND,rmi1,tmp));
1861          return Xcc_NZ;
1862       }
1863    }
1864 
1865    /* CmpNEZ32(Or32(x,y)) */
1866    {
1867       DECLARE_PATTERN(p_CmpNEZ32_Or32);
1868       DEFINE_PATTERN(p_CmpNEZ32_Or32,
1869                      unop(Iop_CmpNEZ32, binop(Iop_Or32, bind(0), bind(1))));
1870       if (matchIRExpr(&mi, p_CmpNEZ32_Or32, e)) {
1871          HReg    r0   = iselIntExpr_R(env, mi.bindee[0]);
1872          X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
1873          HReg    tmp  = newVRegI(env);
1874          addInstr(env, mk_iMOVsd_RR(r0, tmp));
1875          addInstr(env, X86Instr_Alu32R(Xalu_OR,rmi1,tmp));
1876          return Xcc_NZ;
1877       }
1878    }
1879 
1880    /* CmpNEZ32(GET(..):I32) */
1881    if (e->tag == Iex_Unop
1882        && e->Iex.Unop.op == Iop_CmpNEZ32
1883        && e->Iex.Unop.arg->tag == Iex_Get) {
1884       X86AMode* am = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1885                                  hregX86_EBP());
1886       addInstr(env, X86Instr_Alu32M(Xalu_CMP, X86RI_Imm(0), am));
1887       return Xcc_NZ;
1888    }
1889 
1890    /* CmpNEZ32(x) */
1891    if (e->tag == Iex_Unop
1892        && e->Iex.Unop.op == Iop_CmpNEZ32) {
1893       HReg    r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
1894       X86RMI* rmi2 = X86RMI_Imm(0);
1895       addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1));
1896       return Xcc_NZ;
1897    }
1898 
1899    /* --- patterns rooted at: CmpNEZ64 --- */
1900 
1901    /* CmpNEZ64(Or64(x,y)) */
1902    {
1903       DECLARE_PATTERN(p_CmpNEZ64_Or64);
1904       DEFINE_PATTERN(p_CmpNEZ64_Or64,
1905                      unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1))));
1906       if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) {
1907          HReg    hi1, lo1, hi2, lo2;
1908          HReg    tmp  = newVRegI(env);
1909          iselInt64Expr( &hi1, &lo1, env, mi.bindee[0] );
1910          addInstr(env, mk_iMOVsd_RR(hi1, tmp));
1911          addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo1),tmp));
1912          iselInt64Expr( &hi2, &lo2, env, mi.bindee[1] );
1913          addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(hi2),tmp));
1914          addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo2),tmp));
1915          return Xcc_NZ;
1916       }
1917    }
1918 
1919    /* CmpNEZ64(x) */
1920    if (e->tag == Iex_Unop
1921        && e->Iex.Unop.op == Iop_CmpNEZ64) {
1922       HReg hi, lo;
1923       HReg tmp = newVRegI(env);
1924       iselInt64Expr( &hi, &lo, env, e->Iex.Unop.arg );
1925       addInstr(env, mk_iMOVsd_RR(hi, tmp));
1926       addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo), tmp));
1927       return Xcc_NZ;
1928    }
1929 
1930    /* --- patterns rooted at: Cmp{EQ,NE}{8,16} --- */
1931 
1932    /* CmpEQ8 / CmpNE8 */
1933    if (e->tag == Iex_Binop
1934        && (e->Iex.Binop.op == Iop_CmpEQ8
1935            || e->Iex.Binop.op == Iop_CmpNE8
1936            || e->Iex.Binop.op == Iop_CasCmpEQ8
1937            || e->Iex.Binop.op == Iop_CasCmpNE8)) {
1938       if (isZeroU8(e->Iex.Binop.arg2)) {
1939          HReg    r1   = iselIntExpr_R(env, e->Iex.Binop.arg1);
1940          addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r1)));
1941          switch (e->Iex.Binop.op) {
1942             case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z;
1943             case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ;
1944             default: vpanic("iselCondCode(x86): CmpXX8(expr,0:I8)");
1945          }
1946       } else {
1947          HReg    r1   = iselIntExpr_R(env, e->Iex.Binop.arg1);
1948          X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1949          HReg    r    = newVRegI(env);
1950          addInstr(env, mk_iMOVsd_RR(r1,r));
1951          addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r));
1952          addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r)));
1953          switch (e->Iex.Binop.op) {
1954             case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z;
1955             case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ;
1956             default: vpanic("iselCondCode(x86): CmpXX8(expr,expr)");
1957          }
1958       }
1959    }
1960 
1961    /* CmpEQ16 / CmpNE16 */
1962    if (e->tag == Iex_Binop
1963        && (e->Iex.Binop.op == Iop_CmpEQ16
1964            || e->Iex.Binop.op == Iop_CmpNE16
1965            || e->Iex.Binop.op == Iop_CasCmpEQ16
1966            || e->Iex.Binop.op == Iop_CasCmpNE16
1967            || e->Iex.Binop.op == Iop_ExpCmpNE16)) {
1968       HReg    r1   = iselIntExpr_R(env, e->Iex.Binop.arg1);
1969       X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1970       HReg    r    = newVRegI(env);
1971       addInstr(env, mk_iMOVsd_RR(r1,r));
1972       addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r));
1973       addInstr(env, X86Instr_Test32(0xFFFF,X86RM_Reg(r)));
1974       switch (e->Iex.Binop.op) {
1975          case Iop_CmpEQ16: case Iop_CasCmpEQ16:
1976             return Xcc_Z;
1977          case Iop_CmpNE16: case Iop_CasCmpNE16: case Iop_ExpCmpNE16:
1978             return Xcc_NZ;
1979          default:
1980             vpanic("iselCondCode(x86): CmpXX16");
1981       }
1982    }
1983 
1984    /* CmpNE32(ccall, 32-bit constant) (--smc-check=all optimisation).
1985       Saves a "movl %eax, %tmp" compared to the default route. */
1986    if (e->tag == Iex_Binop
1987        && e->Iex.Binop.op == Iop_CmpNE32
1988        && e->Iex.Binop.arg1->tag == Iex_CCall
1989        && e->Iex.Binop.arg2->tag == Iex_Const) {
1990       IRExpr* cal = e->Iex.Binop.arg1;
1991       IRExpr* con = e->Iex.Binop.arg2;
1992       /* clone & partial-eval of generic Iex_CCall and Iex_Const cases */
1993       vassert(cal->Iex.CCall.retty == Ity_I32); /* else ill-typed IR */
1994       vassert(con->Iex.Const.con->tag == Ico_U32);
1995       /* Marshal args, do the call. */
1996       UInt   addToSp = 0;
1997       RetLoc rloc    = mk_RetLoc_INVALID();
1998       doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
1999                     cal->Iex.CCall.cee,
2000                     cal->Iex.CCall.retty, cal->Iex.CCall.args );
2001       vassert(is_sane_RetLoc(rloc));
2002       vassert(rloc.pri == RLPri_Int);
2003       vassert(addToSp == 0);
2004       /* */
2005       addInstr(env, X86Instr_Alu32R(Xalu_CMP,
2006                                     X86RMI_Imm(con->Iex.Const.con->Ico.U32),
2007                                     hregX86_EAX()));
2008       return Xcc_NZ;
2009    }
2010 
2011    /* Cmp*32*(x,y) */
2012    if (e->tag == Iex_Binop
2013        && (e->Iex.Binop.op == Iop_CmpEQ32
2014            || e->Iex.Binop.op == Iop_CmpNE32
2015            || e->Iex.Binop.op == Iop_CmpLT32S
2016            || e->Iex.Binop.op == Iop_CmpLT32U
2017            || e->Iex.Binop.op == Iop_CmpLE32S
2018            || e->Iex.Binop.op == Iop_CmpLE32U
2019            || e->Iex.Binop.op == Iop_CasCmpEQ32
2020            || e->Iex.Binop.op == Iop_CasCmpNE32
2021            || e->Iex.Binop.op == Iop_ExpCmpNE32)) {
2022       HReg    r1   = iselIntExpr_R(env, e->Iex.Binop.arg1);
2023       X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2024       addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1));
2025       switch (e->Iex.Binop.op) {
2026          case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Xcc_Z;
2027          case Iop_CmpNE32:
2028          case Iop_CasCmpNE32: case Iop_ExpCmpNE32: return Xcc_NZ;
2029          case Iop_CmpLT32S: return Xcc_L;
2030          case Iop_CmpLT32U: return Xcc_B;
2031          case Iop_CmpLE32S: return Xcc_LE;
2032          case Iop_CmpLE32U: return Xcc_BE;
2033          default: vpanic("iselCondCode(x86): CmpXX32");
2034       }
2035    }
2036 
2037    /* CmpNE64 */
2038    if (e->tag == Iex_Binop
2039        && (e->Iex.Binop.op == Iop_CmpNE64
2040            || e->Iex.Binop.op == Iop_CmpEQ64)) {
2041       HReg hi1, hi2, lo1, lo2;
2042       HReg tHi = newVRegI(env);
2043       HReg tLo = newVRegI(env);
2044       iselInt64Expr( &hi1, &lo1, env, e->Iex.Binop.arg1 );
2045       iselInt64Expr( &hi2, &lo2, env, e->Iex.Binop.arg2 );
2046       addInstr(env, mk_iMOVsd_RR(hi1, tHi));
2047       addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(hi2), tHi));
2048       addInstr(env, mk_iMOVsd_RR(lo1, tLo));
2049       addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(lo2), tLo));
2050       addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(tHi), tLo));
2051       switch (e->Iex.Binop.op) {
2052          case Iop_CmpNE64: return Xcc_NZ;
2053          case Iop_CmpEQ64: return Xcc_Z;
2054          default: vpanic("iselCondCode(x86): CmpXX64");
2055       }
2056    }
2057 
2058    ppIRExpr(e);
2059    vpanic("iselCondCode");
2060 }
2061 
2062 
2063 /*---------------------------------------------------------*/
2064 /*--- ISEL: Integer expressions (64 bit)                ---*/
2065 /*---------------------------------------------------------*/
2066 
2067 /* Compute a 64-bit value into a register pair, which is returned as
2068    the first two parameters.  As with iselIntExpr_R, these may be
2069    either real or virtual regs; in any case they must not be changed
2070    by subsequent code emitted by the caller.  */
2071 
iselInt64Expr(HReg * rHi,HReg * rLo,ISelEnv * env,IRExpr * e)2072 static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
2073 {
2074    iselInt64Expr_wrk(rHi, rLo, env, e);
2075 #  if 0
2076    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2077 #  endif
2078    vassert(hregClass(*rHi) == HRcInt32);
2079    vassert(hregIsVirtual(*rHi));
2080    vassert(hregClass(*rLo) == HRcInt32);
2081    vassert(hregIsVirtual(*rLo));
2082 }
2083 
2084 /* DO NOT CALL THIS DIRECTLY ! */
iselInt64Expr_wrk(HReg * rHi,HReg * rLo,ISelEnv * env,IRExpr * e)2085 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
2086 {
2087    MatchInfo mi;
2088    HWord fn = 0; /* helper fn for most SIMD64 stuff */
2089    vassert(e);
2090    vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
2091 
2092    /* 64-bit literal */
2093    if (e->tag == Iex_Const) {
2094       ULong w64 = e->Iex.Const.con->Ico.U64;
2095       UInt  wHi = toUInt(w64 >> 32);
2096       UInt  wLo = toUInt(w64);
2097       HReg  tLo = newVRegI(env);
2098       HReg  tHi = newVRegI(env);
2099       vassert(e->Iex.Const.con->tag == Ico_U64);
2100       if (wLo == wHi) {
2101          /* Save a precious Int register in this special case. */
2102          addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
2103          *rHi = tLo;
2104          *rLo = tLo;
2105       } else {
2106          addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wHi), tHi));
2107          addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
2108          *rHi = tHi;
2109          *rLo = tLo;
2110       }
2111       return;
2112    }
2113 
2114    /* read 64-bit IRTemp */
2115    if (e->tag == Iex_RdTmp) {
2116       lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
2117       return;
2118    }
2119 
2120    /* 64-bit load */
2121    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2122       HReg     tLo, tHi;
2123       X86AMode *am0, *am4;
2124       vassert(e->Iex.Load.ty == Ity_I64);
2125       tLo = newVRegI(env);
2126       tHi = newVRegI(env);
2127       am0 = iselIntExpr_AMode(env, e->Iex.Load.addr);
2128       am4 = advance4(am0);
2129       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am0), tLo ));
2130       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2131       *rHi = tHi;
2132       *rLo = tLo;
2133       return;
2134    }
2135 
2136    /* 64-bit GET */
2137    if (e->tag == Iex_Get) {
2138       X86AMode* am  = X86AMode_IR(e->Iex.Get.offset, hregX86_EBP());
2139       X86AMode* am4 = advance4(am);
2140       HReg tLo = newVRegI(env);
2141       HReg tHi = newVRegI(env);
2142       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
2143       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2144       *rHi = tHi;
2145       *rLo = tLo;
2146       return;
2147    }
2148 
2149    /* 64-bit GETI */
2150    if (e->tag == Iex_GetI) {
2151       X86AMode* am
2152          = genGuestArrayOffset( env, e->Iex.GetI.descr,
2153                                      e->Iex.GetI.ix, e->Iex.GetI.bias );
2154       X86AMode* am4 = advance4(am);
2155       HReg tLo = newVRegI(env);
2156       HReg tHi = newVRegI(env);
2157       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
2158       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2159       *rHi = tHi;
2160       *rLo = tLo;
2161       return;
2162    }
2163 
2164    /* 64-bit ITE: ITE(g, expr, expr) */ // VFD
2165    if (e->tag == Iex_ITE) {
2166       HReg e0Lo, e0Hi, e1Lo, e1Hi;
2167       HReg tLo = newVRegI(env);
2168       HReg tHi = newVRegI(env);
2169       iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.ITE.iffalse);
2170       iselInt64Expr(&e1Hi, &e1Lo, env, e->Iex.ITE.iftrue);
2171       addInstr(env, mk_iMOVsd_RR(e1Hi, tHi));
2172       addInstr(env, mk_iMOVsd_RR(e1Lo, tLo));
2173       X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
2174       /* This assumes the first cmov32 doesn't trash the condition
2175          codes, so they are still available for the second cmov32 */
2176       addInstr(env, X86Instr_CMov32(cc ^ 1, X86RM_Reg(e0Hi), tHi));
2177       addInstr(env, X86Instr_CMov32(cc ^ 1, X86RM_Reg(e0Lo), tLo));
2178       *rHi = tHi;
2179       *rLo = tLo;
2180       return;
2181    }
2182 
2183    /* --------- BINARY ops --------- */
2184    if (e->tag == Iex_Binop) {
2185       switch (e->Iex.Binop.op) {
2186          /* 32 x 32 -> 64 multiply */
2187          case Iop_MullU32:
2188          case Iop_MullS32: {
2189             /* get one operand into %eax, and the other into a R/M.
2190                Need to make an educated guess about which is better in
2191                which. */
2192             HReg   tLo    = newVRegI(env);
2193             HReg   tHi    = newVRegI(env);
2194             Bool   syned  = toBool(e->Iex.Binop.op == Iop_MullS32);
2195             X86RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1);
2196             HReg   rRight = iselIntExpr_R(env, e->Iex.Binop.arg2);
2197             addInstr(env, mk_iMOVsd_RR(rRight, hregX86_EAX()));
2198             addInstr(env, X86Instr_MulL(syned, rmLeft));
2199             /* Result is now in EDX:EAX.  Tell the caller. */
2200             addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2201             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2202             *rHi = tHi;
2203             *rLo = tLo;
2204             return;
2205          }
2206 
2207          /* 64 x 32 -> (32(rem),32(div)) division */
2208          case Iop_DivModU64to32:
2209          case Iop_DivModS64to32: {
2210             /* Get the 64-bit operand into edx:eax, and the other into
2211                any old R/M. */
2212             HReg sHi, sLo;
2213             HReg   tLo     = newVRegI(env);
2214             HReg   tHi     = newVRegI(env);
2215             Bool   syned   = toBool(e->Iex.Binop.op == Iop_DivModS64to32);
2216             X86RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
2217             iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2218             addInstr(env, mk_iMOVsd_RR(sHi, hregX86_EDX()));
2219             addInstr(env, mk_iMOVsd_RR(sLo, hregX86_EAX()));
2220             addInstr(env, X86Instr_Div(syned, rmRight));
2221             addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2222             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2223             *rHi = tHi;
2224             *rLo = tLo;
2225             return;
2226          }
2227 
2228          /* Or64/And64/Xor64 */
2229          case Iop_Or64:
2230          case Iop_And64:
2231          case Iop_Xor64: {
2232             HReg xLo, xHi, yLo, yHi;
2233             HReg tLo = newVRegI(env);
2234             HReg tHi = newVRegI(env);
2235             X86AluOp op = e->Iex.Binop.op==Iop_Or64 ? Xalu_OR
2236                           : e->Iex.Binop.op==Iop_And64 ? Xalu_AND
2237                           : Xalu_XOR;
2238             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2239             iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2240             addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2241             addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yHi), tHi));
2242             addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2243             addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yLo), tLo));
2244             *rHi = tHi;
2245             *rLo = tLo;
2246             return;
2247          }
2248 
2249          /* Add64/Sub64 */
2250          case Iop_Add64:
2251             if (e->Iex.Binop.arg2->tag == Iex_Const) {
2252                /* special case Add64(e, const) */
2253                ULong w64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
2254                UInt  wHi = toUInt(w64 >> 32);
2255                UInt  wLo = toUInt(w64);
2256                HReg  tLo = newVRegI(env);
2257                HReg  tHi = newVRegI(env);
2258                HReg  xLo, xHi;
2259                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64);
2260                iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2261                addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2262                addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2263                addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(wLo), tLo));
2264                addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Imm(wHi), tHi));
2265                *rHi = tHi;
2266                *rLo = tLo;
2267                return;
2268             }
2269             /* else fall through to the generic case */
2270          case Iop_Sub64: {
2271             HReg xLo, xHi, yLo, yHi;
2272             HReg tLo = newVRegI(env);
2273             HReg tHi = newVRegI(env);
2274             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2275             addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2276             addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2277             iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2278             if (e->Iex.Binop.op==Iop_Add64) {
2279                addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Reg(yLo), tLo));
2280                addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Reg(yHi), tHi));
2281             } else {
2282                addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
2283                addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
2284             }
2285             *rHi = tHi;
2286             *rLo = tLo;
2287             return;
2288          }
2289 
2290          /* 32HLto64(e1,e2) */
2291          case Iop_32HLto64:
2292             *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2293             *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2294             return;
2295 
2296          /* 64-bit shifts */
2297          case Iop_Shl64: {
2298             /* We use the same ingenious scheme as gcc.  Put the value
2299                to be shifted into %hi:%lo, and the shift amount into
2300                %cl.  Then (dsts on right, a la ATT syntax):
2301 
2302                shldl %cl, %lo, %hi   -- make %hi be right for the
2303                                      -- shift amt %cl % 32
2304                shll  %cl, %lo        -- make %lo be right for the
2305                                      -- shift amt %cl % 32
2306 
2307                Now, if (shift amount % 64) is in the range 32 .. 63,
2308                we have to do a fixup, which puts the result low half
2309                into the result high half, and zeroes the low half:
2310 
2311                testl $32, %ecx
2312 
2313                cmovnz %lo, %hi
2314                movl $0, %tmp         -- sigh; need yet another reg
2315                cmovnz %tmp, %lo
2316             */
2317             HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2318             tLo = newVRegI(env);
2319             tHi = newVRegI(env);
2320             tTemp = newVRegI(env);
2321             rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2322             iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2323             addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2324             addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2325             addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2326             /* Ok.  Now shift amt is in %ecx, and value is in tHi/tLo
2327                and those regs are legitimately modifiable. */
2328             addInstr(env, X86Instr_Sh3232(Xsh_SHL, 0/*%cl*/, tLo, tHi));
2329             addInstr(env, X86Instr_Sh32(Xsh_SHL, 0/*%cl*/, tLo));
2330             addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX())));
2331             addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tLo), tHi));
2332             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2333             addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tLo));
2334             *rHi = tHi;
2335             *rLo = tLo;
2336             return;
2337          }
2338 
2339          case Iop_Shr64: {
2340             /* We use the same ingenious scheme as gcc.  Put the value
2341                to be shifted into %hi:%lo, and the shift amount into
2342                %cl.  Then:
2343 
2344                shrdl %cl, %hi, %lo   -- make %lo be right for the
2345                                      -- shift amt %cl % 32
2346                shrl  %cl, %hi        -- make %hi be right for the
2347                                      -- shift amt %cl % 32
2348 
2349                Now, if (shift amount % 64) is in the range 32 .. 63,
2350                we have to do a fixup, which puts the result high half
2351                into the result low half, and zeroes the high half:
2352 
2353                testl $32, %ecx
2354 
2355                cmovnz %hi, %lo
2356                movl $0, %tmp         -- sigh; need yet another reg
2357                cmovnz %tmp, %hi
2358             */
2359             HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2360             tLo = newVRegI(env);
2361             tHi = newVRegI(env);
2362             tTemp = newVRegI(env);
2363             rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2364             iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2365             addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2366             addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2367             addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2368             /* Ok.  Now shift amt is in %ecx, and value is in tHi/tLo
2369                and those regs are legitimately modifiable. */
2370             addInstr(env, X86Instr_Sh3232(Xsh_SHR, 0/*%cl*/, tHi, tLo));
2371             addInstr(env, X86Instr_Sh32(Xsh_SHR, 0/*%cl*/, tHi));
2372             addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX())));
2373             addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tHi), tLo));
2374             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2375             addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tHi));
2376             *rHi = tHi;
2377             *rLo = tLo;
2378             return;
2379          }
2380 
2381          /* F64 -> I64 */
2382          /* Sigh, this is an almost exact copy of the F64 -> I32/I16
2383             case.  Unfortunately I see no easy way to avoid the
2384             duplication. */
2385          case Iop_F64toI64S: {
2386             HReg rf  = iselDblExpr(env, e->Iex.Binop.arg2);
2387             HReg tLo = newVRegI(env);
2388             HReg tHi = newVRegI(env);
2389 
2390             /* Used several times ... */
2391             /* Careful ... this sharing is only safe because
2392 	       zero_esp/four_esp do not hold any registers which the
2393 	       register allocator could attempt to swizzle later. */
2394             X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2395             X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
2396 
2397             /* rf now holds the value to be converted, and rrm holds
2398                the rounding mode value, encoded as per the
2399                IRRoundingMode enum.  The first thing to do is set the
2400                FPU's rounding mode accordingly. */
2401 
2402             /* Create a space for the format conversion. */
2403             /* subl $8, %esp */
2404             sub_from_esp(env, 8);
2405 
2406             /* Set host rounding mode */
2407             set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2408 
2409             /* gistll %rf, 0(%esp) */
2410             addInstr(env, X86Instr_FpLdStI(False/*store*/, 8, rf, zero_esp));
2411 
2412             /* movl 0(%esp), %dstLo */
2413             /* movl 4(%esp), %dstHi */
2414             addInstr(env, X86Instr_Alu32R(
2415                              Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
2416             addInstr(env, X86Instr_Alu32R(
2417                              Xalu_MOV, X86RMI_Mem(four_esp), tHi));
2418 
2419             /* Restore default FPU rounding. */
2420             set_FPU_rounding_default( env );
2421 
2422             /* addl $8, %esp */
2423             add_to_esp(env, 8);
2424 
2425             *rHi = tHi;
2426             *rLo = tLo;
2427             return;
2428          }
2429 
2430          case Iop_Add8x8:
2431             fn = (HWord)h_generic_calc_Add8x8; goto binnish;
2432          case Iop_Add16x4:
2433             fn = (HWord)h_generic_calc_Add16x4; goto binnish;
2434          case Iop_Add32x2:
2435             fn = (HWord)h_generic_calc_Add32x2; goto binnish;
2436 
2437          case Iop_Avg8Ux8:
2438             fn = (HWord)h_generic_calc_Avg8Ux8; goto binnish;
2439          case Iop_Avg16Ux4:
2440             fn = (HWord)h_generic_calc_Avg16Ux4; goto binnish;
2441 
2442          case Iop_CmpEQ8x8:
2443             fn = (HWord)h_generic_calc_CmpEQ8x8; goto binnish;
2444          case Iop_CmpEQ16x4:
2445             fn = (HWord)h_generic_calc_CmpEQ16x4; goto binnish;
2446          case Iop_CmpEQ32x2:
2447             fn = (HWord)h_generic_calc_CmpEQ32x2; goto binnish;
2448 
2449          case Iop_CmpGT8Sx8:
2450             fn = (HWord)h_generic_calc_CmpGT8Sx8; goto binnish;
2451          case Iop_CmpGT16Sx4:
2452             fn = (HWord)h_generic_calc_CmpGT16Sx4; goto binnish;
2453          case Iop_CmpGT32Sx2:
2454             fn = (HWord)h_generic_calc_CmpGT32Sx2; goto binnish;
2455 
2456          case Iop_InterleaveHI8x8:
2457             fn = (HWord)h_generic_calc_InterleaveHI8x8; goto binnish;
2458          case Iop_InterleaveLO8x8:
2459             fn = (HWord)h_generic_calc_InterleaveLO8x8; goto binnish;
2460          case Iop_InterleaveHI16x4:
2461             fn = (HWord)h_generic_calc_InterleaveHI16x4; goto binnish;
2462          case Iop_InterleaveLO16x4:
2463             fn = (HWord)h_generic_calc_InterleaveLO16x4; goto binnish;
2464          case Iop_InterleaveHI32x2:
2465             fn = (HWord)h_generic_calc_InterleaveHI32x2; goto binnish;
2466          case Iop_InterleaveLO32x2:
2467             fn = (HWord)h_generic_calc_InterleaveLO32x2; goto binnish;
2468          case Iop_CatOddLanes16x4:
2469             fn = (HWord)h_generic_calc_CatOddLanes16x4; goto binnish;
2470          case Iop_CatEvenLanes16x4:
2471             fn = (HWord)h_generic_calc_CatEvenLanes16x4; goto binnish;
2472          case Iop_Perm8x8:
2473             fn = (HWord)h_generic_calc_Perm8x8; goto binnish;
2474 
2475          case Iop_Max8Ux8:
2476             fn = (HWord)h_generic_calc_Max8Ux8; goto binnish;
2477          case Iop_Max16Sx4:
2478             fn = (HWord)h_generic_calc_Max16Sx4; goto binnish;
2479          case Iop_Min8Ux8:
2480             fn = (HWord)h_generic_calc_Min8Ux8; goto binnish;
2481          case Iop_Min16Sx4:
2482             fn = (HWord)h_generic_calc_Min16Sx4; goto binnish;
2483 
2484          case Iop_Mul16x4:
2485             fn = (HWord)h_generic_calc_Mul16x4; goto binnish;
2486          case Iop_Mul32x2:
2487             fn = (HWord)h_generic_calc_Mul32x2; goto binnish;
2488          case Iop_MulHi16Sx4:
2489             fn = (HWord)h_generic_calc_MulHi16Sx4; goto binnish;
2490          case Iop_MulHi16Ux4:
2491             fn = (HWord)h_generic_calc_MulHi16Ux4; goto binnish;
2492 
2493          case Iop_QAdd8Sx8:
2494             fn = (HWord)h_generic_calc_QAdd8Sx8; goto binnish;
2495          case Iop_QAdd16Sx4:
2496             fn = (HWord)h_generic_calc_QAdd16Sx4; goto binnish;
2497          case Iop_QAdd8Ux8:
2498             fn = (HWord)h_generic_calc_QAdd8Ux8; goto binnish;
2499          case Iop_QAdd16Ux4:
2500             fn = (HWord)h_generic_calc_QAdd16Ux4; goto binnish;
2501 
2502          case Iop_QNarrowBin32Sto16Sx4:
2503             fn = (HWord)h_generic_calc_QNarrowBin32Sto16Sx4; goto binnish;
2504          case Iop_QNarrowBin16Sto8Sx8:
2505             fn = (HWord)h_generic_calc_QNarrowBin16Sto8Sx8; goto binnish;
2506          case Iop_QNarrowBin16Sto8Ux8:
2507             fn = (HWord)h_generic_calc_QNarrowBin16Sto8Ux8; goto binnish;
2508          case Iop_NarrowBin16to8x8:
2509             fn = (HWord)h_generic_calc_NarrowBin16to8x8; goto binnish;
2510          case Iop_NarrowBin32to16x4:
2511             fn = (HWord)h_generic_calc_NarrowBin32to16x4; goto binnish;
2512 
2513          case Iop_QSub8Sx8:
2514             fn = (HWord)h_generic_calc_QSub8Sx8; goto binnish;
2515          case Iop_QSub16Sx4:
2516             fn = (HWord)h_generic_calc_QSub16Sx4; goto binnish;
2517          case Iop_QSub8Ux8:
2518             fn = (HWord)h_generic_calc_QSub8Ux8; goto binnish;
2519          case Iop_QSub16Ux4:
2520             fn = (HWord)h_generic_calc_QSub16Ux4; goto binnish;
2521 
2522          case Iop_Sub8x8:
2523             fn = (HWord)h_generic_calc_Sub8x8; goto binnish;
2524          case Iop_Sub16x4:
2525             fn = (HWord)h_generic_calc_Sub16x4; goto binnish;
2526          case Iop_Sub32x2:
2527             fn = (HWord)h_generic_calc_Sub32x2; goto binnish;
2528 
2529          binnish: {
2530             /* Note: the following assumes all helpers are of
2531                signature
2532                   ULong fn ( ULong, ULong ), and they are
2533                not marked as regparm functions.
2534             */
2535             HReg xLo, xHi, yLo, yHi;
2536             HReg tLo = newVRegI(env);
2537             HReg tHi = newVRegI(env);
2538             iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2539             addInstr(env, X86Instr_Push(X86RMI_Reg(yHi)));
2540             addInstr(env, X86Instr_Push(X86RMI_Reg(yLo)));
2541             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2542             addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2543             addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2544             addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
2545                                          0, mk_RetLoc_simple(RLPri_2Int) ));
2546             add_to_esp(env, 4*4);
2547             addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2548             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2549             *rHi = tHi;
2550             *rLo = tLo;
2551             return;
2552          }
2553 
2554          case Iop_ShlN32x2:
2555             fn = (HWord)h_generic_calc_ShlN32x2; goto shifty;
2556          case Iop_ShlN16x4:
2557             fn = (HWord)h_generic_calc_ShlN16x4; goto shifty;
2558          case Iop_ShlN8x8:
2559             fn = (HWord)h_generic_calc_ShlN8x8;  goto shifty;
2560          case Iop_ShrN32x2:
2561             fn = (HWord)h_generic_calc_ShrN32x2; goto shifty;
2562          case Iop_ShrN16x4:
2563             fn = (HWord)h_generic_calc_ShrN16x4; goto shifty;
2564          case Iop_SarN32x2:
2565             fn = (HWord)h_generic_calc_SarN32x2; goto shifty;
2566          case Iop_SarN16x4:
2567             fn = (HWord)h_generic_calc_SarN16x4; goto shifty;
2568          case Iop_SarN8x8:
2569             fn = (HWord)h_generic_calc_SarN8x8;  goto shifty;
2570          shifty: {
2571             /* Note: the following assumes all helpers are of
2572                signature
2573                   ULong fn ( ULong, UInt ), and they are
2574                not marked as regparm functions.
2575             */
2576             HReg xLo, xHi;
2577             HReg tLo = newVRegI(env);
2578             HReg tHi = newVRegI(env);
2579             X86RMI* y = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2580             addInstr(env, X86Instr_Push(y));
2581             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2582             addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2583             addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2584             addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
2585                                          0, mk_RetLoc_simple(RLPri_2Int) ));
2586             add_to_esp(env, 3*4);
2587             addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2588             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2589             *rHi = tHi;
2590             *rLo = tLo;
2591             return;
2592          }
2593 
2594          default:
2595             break;
2596       }
2597    } /* if (e->tag == Iex_Binop) */
2598 
2599 
2600    /* --------- UNARY ops --------- */
2601    if (e->tag == Iex_Unop) {
2602       switch (e->Iex.Unop.op) {
2603 
2604          /* 32Sto64(e) */
2605          case Iop_32Sto64: {
2606             HReg tLo = newVRegI(env);
2607             HReg tHi = newVRegI(env);
2608             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2609             addInstr(env, mk_iMOVsd_RR(src,tHi));
2610             addInstr(env, mk_iMOVsd_RR(src,tLo));
2611             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tHi));
2612             *rHi = tHi;
2613             *rLo = tLo;
2614             return;
2615          }
2616 
2617          /* 32Uto64(e) */
2618          case Iop_32Uto64: {
2619             HReg tLo = newVRegI(env);
2620             HReg tHi = newVRegI(env);
2621             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2622             addInstr(env, mk_iMOVsd_RR(src,tLo));
2623             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2624             *rHi = tHi;
2625             *rLo = tLo;
2626             return;
2627          }
2628 
2629          /* 16Uto64(e) */
2630          case Iop_16Uto64: {
2631             HReg tLo = newVRegI(env);
2632             HReg tHi = newVRegI(env);
2633             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2634             addInstr(env, mk_iMOVsd_RR(src,tLo));
2635             addInstr(env, X86Instr_Alu32R(Xalu_AND,
2636                                           X86RMI_Imm(0xFFFF), tLo));
2637             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2638             *rHi = tHi;
2639             *rLo = tLo;
2640             return;
2641          }
2642 
2643          /* V128{HI}to64 */
2644          case Iop_V128HIto64:
2645          case Iop_V128to64: {
2646             Int  off = e->Iex.Unop.op==Iop_V128HIto64 ? 8 : 0;
2647             HReg tLo = newVRegI(env);
2648             HReg tHi = newVRegI(env);
2649             HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
2650             X86AMode* esp0  = X86AMode_IR(0,     hregX86_ESP());
2651             X86AMode* espLO = X86AMode_IR(off,   hregX86_ESP());
2652             X86AMode* espHI = X86AMode_IR(off+4, hregX86_ESP());
2653             sub_from_esp(env, 16);
2654             addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
2655             addInstr(env, X86Instr_Alu32R( Xalu_MOV,
2656                                            X86RMI_Mem(espLO), tLo ));
2657             addInstr(env, X86Instr_Alu32R( Xalu_MOV,
2658                                            X86RMI_Mem(espHI), tHi ));
2659             add_to_esp(env, 16);
2660             *rHi = tHi;
2661             *rLo = tLo;
2662             return;
2663          }
2664 
2665          /* could do better than this, but for now ... */
2666          case Iop_1Sto64: {
2667             HReg tLo = newVRegI(env);
2668             HReg tHi = newVRegI(env);
2669             X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2670             addInstr(env, X86Instr_Set32(cond,tLo));
2671             addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, tLo));
2672             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tLo));
2673             addInstr(env, mk_iMOVsd_RR(tLo, tHi));
2674             *rHi = tHi;
2675             *rLo = tLo;
2676             return;
2677          }
2678 
2679          /* Not64(e) */
2680          case Iop_Not64: {
2681             HReg tLo = newVRegI(env);
2682             HReg tHi = newVRegI(env);
2683             HReg sHi, sLo;
2684             iselInt64Expr(&sHi, &sLo, env, e->Iex.Unop.arg);
2685             addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2686             addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2687             addInstr(env, X86Instr_Unary32(Xun_NOT,tHi));
2688             addInstr(env, X86Instr_Unary32(Xun_NOT,tLo));
2689             *rHi = tHi;
2690             *rLo = tLo;
2691             return;
2692          }
2693 
2694          /* Left64(e) */
2695          case Iop_Left64: {
2696             HReg yLo, yHi;
2697             HReg tLo = newVRegI(env);
2698             HReg tHi = newVRegI(env);
2699             /* yHi:yLo = arg */
2700             iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
2701             /* tLo = 0 - yLo, and set carry */
2702             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tLo));
2703             addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
2704             /* tHi = 0 - yHi - carry */
2705             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2706             addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
2707             /* So now we have tHi:tLo = -arg.  To finish off, or 'arg'
2708                back in, so as to give the final result
2709                tHi:tLo = arg | -arg. */
2710             addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yLo), tLo));
2711             addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yHi), tHi));
2712             *rHi = tHi;
2713             *rLo = tLo;
2714             return;
2715          }
2716 
2717          /* --- patterns rooted at: CmpwNEZ64 --- */
2718 
2719          /* CmpwNEZ64(e) */
2720          case Iop_CmpwNEZ64: {
2721 
2722          DECLARE_PATTERN(p_CmpwNEZ64_Or64);
2723          DEFINE_PATTERN(p_CmpwNEZ64_Or64,
2724                         unop(Iop_CmpwNEZ64,binop(Iop_Or64,bind(0),bind(1))));
2725          if (matchIRExpr(&mi, p_CmpwNEZ64_Or64, e)) {
2726             /* CmpwNEZ64(Or64(x,y)) */
2727             HReg xHi,xLo,yHi,yLo;
2728             HReg xBoth = newVRegI(env);
2729             HReg merged = newVRegI(env);
2730             HReg tmp2 = newVRegI(env);
2731 
2732             iselInt64Expr(&xHi,&xLo, env, mi.bindee[0]);
2733             addInstr(env, mk_iMOVsd_RR(xHi,xBoth));
2734             addInstr(env, X86Instr_Alu32R(Xalu_OR,
2735                                           X86RMI_Reg(xLo),xBoth));
2736 
2737             iselInt64Expr(&yHi,&yLo, env, mi.bindee[1]);
2738             addInstr(env, mk_iMOVsd_RR(yHi,merged));
2739             addInstr(env, X86Instr_Alu32R(Xalu_OR,
2740                                           X86RMI_Reg(yLo),merged));
2741             addInstr(env, X86Instr_Alu32R(Xalu_OR,
2742                                              X86RMI_Reg(xBoth),merged));
2743 
2744             /* tmp2 = (merged | -merged) >>s 31 */
2745             addInstr(env, mk_iMOVsd_RR(merged,tmp2));
2746             addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2));
2747             addInstr(env, X86Instr_Alu32R(Xalu_OR,
2748                                           X86RMI_Reg(merged), tmp2));
2749             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2));
2750             *rHi = tmp2;
2751             *rLo = tmp2;
2752             return;
2753          } else {
2754             /* CmpwNEZ64(e) */
2755             HReg srcLo, srcHi;
2756             HReg tmp1  = newVRegI(env);
2757             HReg tmp2  = newVRegI(env);
2758             /* srcHi:srcLo = arg */
2759             iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
2760             /* tmp1 = srcHi | srcLo */
2761             addInstr(env, mk_iMOVsd_RR(srcHi,tmp1));
2762             addInstr(env, X86Instr_Alu32R(Xalu_OR,
2763                                           X86RMI_Reg(srcLo), tmp1));
2764             /* tmp2 = (tmp1 | -tmp1) >>s 31 */
2765             addInstr(env, mk_iMOVsd_RR(tmp1,tmp2));
2766             addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2));
2767             addInstr(env, X86Instr_Alu32R(Xalu_OR,
2768                                           X86RMI_Reg(tmp1), tmp2));
2769             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2));
2770             *rHi = tmp2;
2771             *rLo = tmp2;
2772             return;
2773          }
2774          }
2775 
2776          /* ReinterpF64asI64(e) */
2777          /* Given an IEEE754 double, produce an I64 with the same bit
2778             pattern. */
2779          case Iop_ReinterpF64asI64: {
2780             HReg rf   = iselDblExpr(env, e->Iex.Unop.arg);
2781             HReg tLo  = newVRegI(env);
2782             HReg tHi  = newVRegI(env);
2783             X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2784             X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
2785             /* paranoia */
2786             set_FPU_rounding_default(env);
2787             /* subl $8, %esp */
2788             sub_from_esp(env, 8);
2789             /* gstD %rf, 0(%esp) */
2790             addInstr(env,
2791                      X86Instr_FpLdSt(False/*store*/, 8, rf, zero_esp));
2792             /* movl 0(%esp), %tLo */
2793             addInstr(env,
2794                      X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
2795             /* movl 4(%esp), %tHi */
2796             addInstr(env,
2797                      X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(four_esp), tHi));
2798             /* addl $8, %esp */
2799             add_to_esp(env, 8);
2800             *rHi = tHi;
2801             *rLo = tLo;
2802             return;
2803          }
2804 
2805          case Iop_CmpNEZ32x2:
2806             fn = (HWord)h_generic_calc_CmpNEZ32x2; goto unish;
2807          case Iop_CmpNEZ16x4:
2808             fn = (HWord)h_generic_calc_CmpNEZ16x4; goto unish;
2809          case Iop_CmpNEZ8x8:
2810             fn = (HWord)h_generic_calc_CmpNEZ8x8; goto unish;
2811          unish: {
2812             /* Note: the following assumes all helpers are of
2813                signature
2814                   ULong fn ( ULong ), and they are
2815                not marked as regparm functions.
2816             */
2817             HReg xLo, xHi;
2818             HReg tLo = newVRegI(env);
2819             HReg tHi = newVRegI(env);
2820             iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg);
2821             addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2822             addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2823             addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
2824                                          0, mk_RetLoc_simple(RLPri_2Int) ));
2825             add_to_esp(env, 2*4);
2826             addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2827             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2828             *rHi = tHi;
2829             *rLo = tLo;
2830             return;
2831          }
2832 
2833          default:
2834             break;
2835       }
2836    } /* if (e->tag == Iex_Unop) */
2837 
2838 
2839    /* --------- CCALL --------- */
2840    if (e->tag == Iex_CCall) {
2841       HReg tLo = newVRegI(env);
2842       HReg tHi = newVRegI(env);
2843 
2844       /* Marshal args, do the call, clear stack. */
2845       UInt   addToSp = 0;
2846       RetLoc rloc    = mk_RetLoc_INVALID();
2847       doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
2848                     e->Iex.CCall.cee,
2849                     e->Iex.CCall.retty, e->Iex.CCall.args );
2850       vassert(is_sane_RetLoc(rloc));
2851       vassert(rloc.pri == RLPri_2Int);
2852       vassert(addToSp == 0);
2853       /* */
2854 
2855       addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2856       addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2857       *rHi = tHi;
2858       *rLo = tLo;
2859       return;
2860    }
2861 
2862    ppIRExpr(e);
2863    vpanic("iselInt64Expr");
2864 }
2865 
2866 
2867 /*---------------------------------------------------------*/
2868 /*--- ISEL: Floating point expressions (32 bit)         ---*/
2869 /*---------------------------------------------------------*/
2870 
2871 /* Nothing interesting here; really just wrappers for
2872    64-bit stuff. */
2873 
iselFltExpr(ISelEnv * env,IRExpr * e)2874 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
2875 {
2876    HReg r = iselFltExpr_wrk( env, e );
2877 #  if 0
2878    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2879 #  endif
2880    vassert(hregClass(r) == HRcFlt64); /* yes, really Flt64 */
2881    vassert(hregIsVirtual(r));
2882    return r;
2883 }
2884 
2885 /* DO NOT CALL THIS DIRECTLY */
iselFltExpr_wrk(ISelEnv * env,IRExpr * e)2886 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
2887 {
2888    IRType ty = typeOfIRExpr(env->type_env,e);
2889    vassert(ty == Ity_F32);
2890 
2891    if (e->tag == Iex_RdTmp) {
2892       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2893    }
2894 
2895    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2896       X86AMode* am;
2897       HReg res = newVRegF(env);
2898       vassert(e->Iex.Load.ty == Ity_F32);
2899       am = iselIntExpr_AMode(env, e->Iex.Load.addr);
2900       addInstr(env, X86Instr_FpLdSt(True/*load*/, 4, res, am));
2901       return res;
2902    }
2903 
2904    if (e->tag == Iex_Binop
2905        && e->Iex.Binop.op == Iop_F64toF32) {
2906       /* Although the result is still held in a standard FPU register,
2907          we need to round it to reflect the loss of accuracy/range
2908          entailed in casting it to a 32-bit float. */
2909       HReg dst = newVRegF(env);
2910       HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
2911       set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2912       addInstr(env, X86Instr_Fp64to32(src,dst));
2913       set_FPU_rounding_default( env );
2914       return dst;
2915    }
2916 
2917    if (e->tag == Iex_Get) {
2918       X86AMode* am = X86AMode_IR( e->Iex.Get.offset,
2919                                   hregX86_EBP() );
2920       HReg res = newVRegF(env);
2921       addInstr(env, X86Instr_FpLdSt( True/*load*/, 4, res, am ));
2922       return res;
2923    }
2924 
2925    if (e->tag == Iex_Unop
2926        && e->Iex.Unop.op == Iop_ReinterpI32asF32) {
2927        /* Given an I32, produce an IEEE754 float with the same bit
2928           pattern. */
2929       HReg    dst = newVRegF(env);
2930       X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
2931       /* paranoia */
2932       addInstr(env, X86Instr_Push(rmi));
2933       addInstr(env, X86Instr_FpLdSt(
2934                        True/*load*/, 4, dst,
2935                        X86AMode_IR(0, hregX86_ESP())));
2936       add_to_esp(env, 4);
2937       return dst;
2938    }
2939 
2940    if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) {
2941       HReg rf  = iselFltExpr(env, e->Iex.Binop.arg2);
2942       HReg dst = newVRegF(env);
2943 
2944       /* rf now holds the value to be rounded.  The first thing to do
2945          is set the FPU's rounding mode accordingly. */
2946 
2947       /* Set host rounding mode */
2948       set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2949 
2950       /* grndint %rf, %dst */
2951       addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst));
2952 
2953       /* Restore default FPU rounding. */
2954       set_FPU_rounding_default( env );
2955 
2956       return dst;
2957    }
2958 
2959    ppIRExpr(e);
2960    vpanic("iselFltExpr_wrk");
2961 }
2962 
2963 
2964 /*---------------------------------------------------------*/
2965 /*--- ISEL: Floating point expressions (64 bit)         ---*/
2966 /*---------------------------------------------------------*/
2967 
2968 /* Compute a 64-bit floating point value into a register, the identity
2969    of which is returned.  As with iselIntExpr_R, the reg may be either
2970    real or virtual; in any case it must not be changed by subsequent
2971    code emitted by the caller.  */
2972 
2973 /* IEEE 754 formats.  From http://www.freesoft.org/CIE/RFC/1832/32.htm:
2974 
2975     Type                  S (1 bit)   E (11 bits)   F (52 bits)
2976     ----                  ---------   -----------   -----------
2977     signalling NaN        u           2047 (max)    .0uuuuu---u
2978                                                     (with at least
2979                                                      one 1 bit)
2980     quiet NaN             u           2047 (max)    .1uuuuu---u
2981 
2982     negative infinity     1           2047 (max)    .000000---0
2983 
2984     positive infinity     0           2047 (max)    .000000---0
2985 
2986     negative zero         1           0             .000000---0
2987 
2988     positive zero         0           0             .000000---0
2989 */
2990 
iselDblExpr(ISelEnv * env,IRExpr * e)2991 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
2992 {
2993    HReg r = iselDblExpr_wrk( env, e );
2994 #  if 0
2995    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2996 #  endif
2997    vassert(hregClass(r) == HRcFlt64);
2998    vassert(hregIsVirtual(r));
2999    return r;
3000 }
3001 
3002 /* DO NOT CALL THIS DIRECTLY */
iselDblExpr_wrk(ISelEnv * env,IRExpr * e)3003 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
3004 {
3005    IRType ty = typeOfIRExpr(env->type_env,e);
3006    vassert(e);
3007    vassert(ty == Ity_F64);
3008 
3009    if (e->tag == Iex_RdTmp) {
3010       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3011    }
3012 
3013    if (e->tag == Iex_Const) {
3014       union { UInt u32x2[2]; ULong u64; Double f64; } u;
3015       HReg freg = newVRegF(env);
3016       vassert(sizeof(u) == 8);
3017       vassert(sizeof(u.u64) == 8);
3018       vassert(sizeof(u.f64) == 8);
3019       vassert(sizeof(u.u32x2) == 8);
3020 
3021       if (e->Iex.Const.con->tag == Ico_F64) {
3022          u.f64 = e->Iex.Const.con->Ico.F64;
3023       }
3024       else if (e->Iex.Const.con->tag == Ico_F64i) {
3025          u.u64 = e->Iex.Const.con->Ico.F64i;
3026       }
3027       else
3028          vpanic("iselDblExpr(x86): const");
3029 
3030       addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[1])));
3031       addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[0])));
3032       addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, freg,
3033                                     X86AMode_IR(0, hregX86_ESP())));
3034       add_to_esp(env, 8);
3035       return freg;
3036    }
3037 
3038    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3039       X86AMode* am;
3040       HReg res = newVRegF(env);
3041       vassert(e->Iex.Load.ty == Ity_F64);
3042       am = iselIntExpr_AMode(env, e->Iex.Load.addr);
3043       addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, res, am));
3044       return res;
3045    }
3046 
3047    if (e->tag == Iex_Get) {
3048       X86AMode* am = X86AMode_IR( e->Iex.Get.offset,
3049                                   hregX86_EBP() );
3050       HReg res = newVRegF(env);
3051       addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am ));
3052       return res;
3053    }
3054 
3055    if (e->tag == Iex_GetI) {
3056       X86AMode* am
3057          = genGuestArrayOffset(
3058               env, e->Iex.GetI.descr,
3059                    e->Iex.GetI.ix, e->Iex.GetI.bias );
3060       HReg res = newVRegF(env);
3061       addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am ));
3062       return res;
3063    }
3064 
3065    if (e->tag == Iex_Triop) {
3066       X86FpOp fpop = Xfp_INVALID;
3067       IRTriop *triop = e->Iex.Triop.details;
3068       switch (triop->op) {
3069          case Iop_AddF64:    fpop = Xfp_ADD; break;
3070          case Iop_SubF64:    fpop = Xfp_SUB; break;
3071          case Iop_MulF64:    fpop = Xfp_MUL; break;
3072          case Iop_DivF64:    fpop = Xfp_DIV; break;
3073          case Iop_ScaleF64:  fpop = Xfp_SCALE; break;
3074          case Iop_Yl2xF64:   fpop = Xfp_YL2X; break;
3075          case Iop_Yl2xp1F64: fpop = Xfp_YL2XP1; break;
3076          case Iop_AtanF64:   fpop = Xfp_ATAN; break;
3077          case Iop_PRemF64:   fpop = Xfp_PREM; break;
3078          case Iop_PRem1F64:  fpop = Xfp_PREM1; break;
3079          default: break;
3080       }
3081       if (fpop != Xfp_INVALID) {
3082          HReg res  = newVRegF(env);
3083          HReg srcL = iselDblExpr(env, triop->arg2);
3084          HReg srcR = iselDblExpr(env, triop->arg3);
3085          /* XXXROUNDINGFIXME */
3086          /* set roundingmode here */
3087          addInstr(env, X86Instr_FpBinary(fpop,srcL,srcR,res));
3088 	 if (fpop != Xfp_ADD && fpop != Xfp_SUB
3089 	     && fpop != Xfp_MUL && fpop != Xfp_DIV)
3090             roundToF64(env, res);
3091          return res;
3092       }
3093    }
3094 
3095    if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) {
3096       HReg rf  = iselDblExpr(env, e->Iex.Binop.arg2);
3097       HReg dst = newVRegF(env);
3098 
3099       /* rf now holds the value to be rounded.  The first thing to do
3100          is set the FPU's rounding mode accordingly. */
3101 
3102       /* Set host rounding mode */
3103       set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
3104 
3105       /* grndint %rf, %dst */
3106       addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst));
3107 
3108       /* Restore default FPU rounding. */
3109       set_FPU_rounding_default( env );
3110 
3111       return dst;
3112    }
3113 
3114    if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64StoF64) {
3115       HReg dst = newVRegF(env);
3116       HReg rHi,rLo;
3117       iselInt64Expr( &rHi, &rLo, env, e->Iex.Binop.arg2);
3118       addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
3119       addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
3120 
3121       /* Set host rounding mode */
3122       set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
3123 
3124       addInstr(env, X86Instr_FpLdStI(
3125                        True/*load*/, 8, dst,
3126                        X86AMode_IR(0, hregX86_ESP())));
3127 
3128       /* Restore default FPU rounding. */
3129       set_FPU_rounding_default( env );
3130 
3131       add_to_esp(env, 8);
3132       return dst;
3133    }
3134 
3135    if (e->tag == Iex_Binop) {
3136       X86FpOp fpop = Xfp_INVALID;
3137       switch (e->Iex.Binop.op) {
3138          case Iop_SinF64:  fpop = Xfp_SIN; break;
3139          case Iop_CosF64:  fpop = Xfp_COS; break;
3140          case Iop_TanF64:  fpop = Xfp_TAN; break;
3141          case Iop_2xm1F64: fpop = Xfp_2XM1; break;
3142          case Iop_SqrtF64: fpop = Xfp_SQRT; break;
3143          default: break;
3144       }
3145       if (fpop != Xfp_INVALID) {
3146          HReg res = newVRegF(env);
3147          HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
3148          /* XXXROUNDINGFIXME */
3149          /* set roundingmode here */
3150          /* Note that X86Instr_FpUnary(Xfp_TAN,..) sets the condition
3151             codes.  I don't think that matters, since this insn
3152             selector never generates such an instruction intervening
3153             between an flag-setting instruction and a flag-using
3154             instruction. */
3155          addInstr(env, X86Instr_FpUnary(fpop,src,res));
3156 	 if (fpop != Xfp_SQRT
3157              && fpop != Xfp_NEG && fpop != Xfp_ABS)
3158             roundToF64(env, res);
3159          return res;
3160       }
3161    }
3162 
3163    if (e->tag == Iex_Unop) {
3164       X86FpOp fpop = Xfp_INVALID;
3165       switch (e->Iex.Unop.op) {
3166          case Iop_NegF64:  fpop = Xfp_NEG; break;
3167          case Iop_AbsF64:  fpop = Xfp_ABS; break;
3168          default: break;
3169       }
3170       if (fpop != Xfp_INVALID) {
3171          HReg res = newVRegF(env);
3172          HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3173          addInstr(env, X86Instr_FpUnary(fpop,src,res));
3174          /* No need to do roundToF64(env,res) for Xfp_NEG or Xfp_ABS,
3175             but might need to do that for other unary ops. */
3176          return res;
3177       }
3178    }
3179 
3180    if (e->tag == Iex_Unop) {
3181       switch (e->Iex.Unop.op) {
3182          case Iop_I32StoF64: {
3183             HReg dst = newVRegF(env);
3184             HReg ri  = iselIntExpr_R(env, e->Iex.Unop.arg);
3185             addInstr(env, X86Instr_Push(X86RMI_Reg(ri)));
3186             set_FPU_rounding_default(env);
3187             addInstr(env, X86Instr_FpLdStI(
3188                              True/*load*/, 4, dst,
3189                              X86AMode_IR(0, hregX86_ESP())));
3190 	    add_to_esp(env, 4);
3191             return dst;
3192          }
3193          case Iop_ReinterpI64asF64: {
3194             /* Given an I64, produce an IEEE754 double with the same
3195                bit pattern. */
3196             HReg dst = newVRegF(env);
3197             HReg rHi, rLo;
3198 	    iselInt64Expr( &rHi, &rLo, env, e->Iex.Unop.arg);
3199             /* paranoia */
3200             set_FPU_rounding_default(env);
3201             addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
3202             addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
3203             addInstr(env, X86Instr_FpLdSt(
3204                              True/*load*/, 8, dst,
3205                              X86AMode_IR(0, hregX86_ESP())));
3206 	    add_to_esp(env, 8);
3207             return dst;
3208 	 }
3209          case Iop_F32toF64: {
3210             /* this is a no-op */
3211             HReg res = iselFltExpr(env, e->Iex.Unop.arg);
3212             return res;
3213 	 }
3214          default:
3215             break;
3216       }
3217    }
3218 
3219    /* --------- MULTIPLEX --------- */
3220    if (e->tag == Iex_ITE) { // VFD
3221      if (ty == Ity_F64
3222          && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
3223         HReg r1  = iselDblExpr(env, e->Iex.ITE.iftrue);
3224         HReg r0  = iselDblExpr(env, e->Iex.ITE.iffalse);
3225         HReg dst = newVRegF(env);
3226         addInstr(env, X86Instr_FpUnary(Xfp_MOV,r1,dst));
3227         X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
3228         addInstr(env, X86Instr_FpCMov(cc ^ 1, r0, dst));
3229         return dst;
3230       }
3231    }
3232 
3233    ppIRExpr(e);
3234    vpanic("iselDblExpr_wrk");
3235 }
3236 
3237 
3238 /*---------------------------------------------------------*/
3239 /*--- ISEL: SIMD (Vector) expressions, 128 bit.         ---*/
3240 /*---------------------------------------------------------*/
3241 
iselVecExpr(ISelEnv * env,IRExpr * e)3242 static HReg iselVecExpr ( ISelEnv* env, IRExpr* e )
3243 {
3244    HReg r = iselVecExpr_wrk( env, e );
3245 #  if 0
3246    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3247 #  endif
3248    vassert(hregClass(r) == HRcVec128);
3249    vassert(hregIsVirtual(r));
3250    return r;
3251 }
3252 
3253 
3254 /* DO NOT CALL THIS DIRECTLY */
iselVecExpr_wrk(ISelEnv * env,IRExpr * e)3255 static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
3256 {
3257 
3258 #  define REQUIRE_SSE1                                    \
3259       do { if (env->hwcaps == 0/*baseline, no sse*/       \
3260                ||  env->hwcaps == VEX_HWCAPS_X86_MMXEXT /*Integer SSE*/) \
3261               goto vec_fail;                              \
3262       } while (0)
3263 
3264 #  define REQUIRE_SSE2                                    \
3265       do { if (0 == (env->hwcaps & VEX_HWCAPS_X86_SSE2))  \
3266               goto vec_fail;                              \
3267       } while (0)
3268 
3269 #  define SSE2_OR_ABOVE                                   \
3270        (env->hwcaps & VEX_HWCAPS_X86_SSE2)
3271 
3272    HWord     fn = 0; /* address of helper fn, if required */
3273    MatchInfo mi;
3274    Bool      arg1isEReg = False;
3275    X86SseOp  op = Xsse_INVALID;
3276    IRType    ty = typeOfIRExpr(env->type_env,e);
3277    vassert(e);
3278    vassert(ty == Ity_V128);
3279 
3280    REQUIRE_SSE1;
3281 
3282    if (e->tag == Iex_RdTmp) {
3283       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3284    }
3285 
3286    if (e->tag == Iex_Get) {
3287       HReg dst = newVRegV(env);
3288       addInstr(env, X86Instr_SseLdSt(
3289                        True/*load*/,
3290                        dst,
3291                        X86AMode_IR(e->Iex.Get.offset, hregX86_EBP())
3292                     )
3293               );
3294       return dst;
3295    }
3296 
3297    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3298       HReg      dst = newVRegV(env);
3299       X86AMode* am  = iselIntExpr_AMode(env, e->Iex.Load.addr);
3300       addInstr(env, X86Instr_SseLdSt( True/*load*/, dst, am ));
3301       return dst;
3302    }
3303 
3304    if (e->tag == Iex_Const) {
3305       HReg dst = newVRegV(env);
3306       vassert(e->Iex.Const.con->tag == Ico_V128);
3307       addInstr(env, X86Instr_SseConst(e->Iex.Const.con->Ico.V128, dst));
3308       return dst;
3309    }
3310 
3311    if (e->tag == Iex_Unop) {
3312 
3313    if (SSE2_OR_ABOVE) {
3314       /* 64UtoV128(LDle:I64(addr)) */
3315       DECLARE_PATTERN(p_zwiden_load64);
3316       DEFINE_PATTERN(p_zwiden_load64,
3317                      unop(Iop_64UtoV128,
3318                           IRExpr_Load(Iend_LE,Ity_I64,bind(0))));
3319       if (matchIRExpr(&mi, p_zwiden_load64, e)) {
3320          X86AMode* am = iselIntExpr_AMode(env, mi.bindee[0]);
3321          HReg dst = newVRegV(env);
3322          addInstr(env, X86Instr_SseLdzLO(8, dst, am));
3323          return dst;
3324       }
3325    }
3326 
3327    switch (e->Iex.Unop.op) {
3328 
3329       case Iop_NotV128: {
3330          HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3331          return do_sse_Not128(env, arg);
3332       }
3333 
3334       case Iop_CmpNEZ64x2: {
3335          /* We can use SSE2 instructions for this. */
3336          /* Ideally, we want to do a 64Ix2 comparison against zero of
3337             the operand.  Problem is no such insn exists.  Solution
3338             therefore is to do a 32Ix4 comparison instead, and bitwise-
3339             negate (NOT) the result.  Let a,b,c,d be 32-bit lanes, and
3340             let the not'd result of this initial comparison be a:b:c:d.
3341             What we need to compute is (a|b):(a|b):(c|d):(c|d).  So, use
3342             pshufd to create a value b:a:d:c, and OR that with a:b:c:d,
3343             giving the required result.
3344 
3345             The required selection sequence is 2,3,0,1, which
3346             according to Intel's documentation means the pshufd
3347             literal value is 0xB1, that is,
3348             (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)
3349          */
3350          HReg arg  = iselVecExpr(env, e->Iex.Unop.arg);
3351          HReg tmp  = newVRegV(env);
3352          HReg dst  = newVRegV(env);
3353          REQUIRE_SSE2;
3354          addInstr(env, X86Instr_SseReRg(Xsse_XOR, tmp, tmp));
3355          addInstr(env, X86Instr_SseReRg(Xsse_CMPEQ32, arg, tmp));
3356          tmp = do_sse_Not128(env, tmp);
3357          addInstr(env, X86Instr_SseShuf(0xB1, tmp, dst));
3358          addInstr(env, X86Instr_SseReRg(Xsse_OR, tmp, dst));
3359          return dst;
3360       }
3361 
3362       case Iop_CmpNEZ32x4: {
3363          /* Sigh, we have to generate lousy code since this has to
3364             work on SSE1 hosts */
3365          /* basically, the idea is: for each lane:
3366                movl lane, %r ; negl %r   (now CF = lane==0 ? 0 : 1)
3367                sbbl %r, %r               (now %r = 1Sto32(CF))
3368                movl %r, lane
3369          */
3370          Int       i;
3371          X86AMode* am;
3372          X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3373          HReg      arg  = iselVecExpr(env, e->Iex.Unop.arg);
3374          HReg      dst  = newVRegV(env);
3375          HReg      r32  = newVRegI(env);
3376          sub_from_esp(env, 16);
3377          addInstr(env, X86Instr_SseLdSt(False/*store*/, arg, esp0));
3378          for (i = 0; i < 4; i++) {
3379             am = X86AMode_IR(i*4, hregX86_ESP());
3380             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), r32));
3381             addInstr(env, X86Instr_Unary32(Xun_NEG, r32));
3382             addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(r32), r32));
3383             addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r32), am));
3384          }
3385          addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3386          add_to_esp(env, 16);
3387          return dst;
3388       }
3389 
3390       case Iop_CmpNEZ8x16:
3391       case Iop_CmpNEZ16x8: {
3392          /* We can use SSE2 instructions for this. */
3393          HReg arg;
3394          HReg vec0 = newVRegV(env);
3395          HReg vec1 = newVRegV(env);
3396          HReg dst  = newVRegV(env);
3397          X86SseOp cmpOp
3398             = e->Iex.Unop.op==Iop_CmpNEZ16x8 ? Xsse_CMPEQ16
3399                                              : Xsse_CMPEQ8;
3400          REQUIRE_SSE2;
3401          addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec0, vec0));
3402          addInstr(env, mk_vMOVsd_RR(vec0, vec1));
3403          addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, vec1, vec1));
3404          /* defer arg computation to here so as to give CMPEQF as long
3405             as possible to complete */
3406          arg = iselVecExpr(env, e->Iex.Unop.arg);
3407          /* vec0 is all 0s; vec1 is all 1s */
3408          addInstr(env, mk_vMOVsd_RR(arg, dst));
3409          /* 16x8 or 8x16 comparison == */
3410          addInstr(env, X86Instr_SseReRg(cmpOp, vec0, dst));
3411          /* invert result */
3412          addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec1, dst));
3413          return dst;
3414       }
3415 
3416       case Iop_RecipEst32Fx4: op = Xsse_RCPF;   goto do_32Fx4_unary;
3417       case Iop_RSqrtEst32Fx4: op = Xsse_RSQRTF; goto do_32Fx4_unary;
3418       do_32Fx4_unary:
3419       {
3420          HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3421          HReg dst = newVRegV(env);
3422          addInstr(env, X86Instr_Sse32Fx4(op, arg, dst));
3423          return dst;
3424       }
3425 
3426       case Iop_RecipEst32F0x4: op = Xsse_RCPF;   goto do_32F0x4_unary;
3427       case Iop_RSqrtEst32F0x4: op = Xsse_RSQRTF; goto do_32F0x4_unary;
3428       case Iop_Sqrt32F0x4:     op = Xsse_SQRTF;  goto do_32F0x4_unary;
3429       do_32F0x4_unary:
3430       {
3431          /* A bit subtle.  We have to copy the arg to the result
3432             register first, because actually doing the SSE scalar insn
3433             leaves the upper 3/4 of the destination register
3434             unchanged.  Whereas the required semantics of these
3435             primops is that the upper 3/4 is simply copied in from the
3436             argument. */
3437          HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3438          HReg dst = newVRegV(env);
3439          addInstr(env, mk_vMOVsd_RR(arg, dst));
3440          addInstr(env, X86Instr_Sse32FLo(op, arg, dst));
3441          return dst;
3442       }
3443 
3444       case Iop_Sqrt64F0x2:  op = Xsse_SQRTF;  goto do_64F0x2_unary;
3445       do_64F0x2_unary:
3446       {
3447          /* A bit subtle.  We have to copy the arg to the result
3448             register first, because actually doing the SSE scalar insn
3449             leaves the upper half of the destination register
3450             unchanged.  Whereas the required semantics of these
3451             primops is that the upper half is simply copied in from the
3452             argument. */
3453          HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3454          HReg dst = newVRegV(env);
3455          REQUIRE_SSE2;
3456          addInstr(env, mk_vMOVsd_RR(arg, dst));
3457          addInstr(env, X86Instr_Sse64FLo(op, arg, dst));
3458          return dst;
3459       }
3460 
3461       case Iop_32UtoV128: {
3462          HReg      dst  = newVRegV(env);
3463          X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3464          X86RMI*   rmi  = iselIntExpr_RMI(env, e->Iex.Unop.arg);
3465          addInstr(env, X86Instr_Push(rmi));
3466 	 addInstr(env, X86Instr_SseLdzLO(4, dst, esp0));
3467          add_to_esp(env, 4);
3468          return dst;
3469       }
3470 
3471       case Iop_64UtoV128: {
3472          HReg      rHi, rLo;
3473          HReg      dst  = newVRegV(env);
3474          X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3475          iselInt64Expr(&rHi, &rLo, env, e->Iex.Unop.arg);
3476          addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
3477          addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
3478 	 addInstr(env, X86Instr_SseLdzLO(8, dst, esp0));
3479          add_to_esp(env, 8);
3480          return dst;
3481       }
3482 
3483       default:
3484          break;
3485    } /* switch (e->Iex.Unop.op) */
3486    } /* if (e->tag == Iex_Unop) */
3487 
3488    if (e->tag == Iex_Binop) {
3489    switch (e->Iex.Binop.op) {
3490 
3491       case Iop_Sqrt64Fx2:
3492          REQUIRE_SSE2;
3493          /* fallthrough */
3494       case Iop_Sqrt32Fx4: {
3495          /* :: (rmode, vec) -> vec */
3496          HReg arg = iselVecExpr(env, e->Iex.Binop.arg2);
3497          HReg dst = newVRegV(env);
3498          /* XXXROUNDINGFIXME */
3499          /* set roundingmode here */
3500          addInstr(env, (e->Iex.Binop.op == Iop_Sqrt64Fx2
3501                            ? X86Instr_Sse64Fx2 : X86Instr_Sse32Fx4)
3502                        (Xsse_SQRTF, arg, dst));
3503          return dst;
3504       }
3505 
3506       case Iop_SetV128lo32: {
3507          HReg dst = newVRegV(env);
3508          HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3509          HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3510          X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3511          sub_from_esp(env, 16);
3512          addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0));
3513          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcI), esp0));
3514          addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3515          add_to_esp(env, 16);
3516          return dst;
3517       }
3518 
3519       case Iop_SetV128lo64: {
3520          HReg dst = newVRegV(env);
3521          HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3522          HReg srcIhi, srcIlo;
3523          X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3524          X86AMode* esp4 = advance4(esp0);
3525          iselInt64Expr(&srcIhi, &srcIlo, env, e->Iex.Binop.arg2);
3526          sub_from_esp(env, 16);
3527          addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0));
3528          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIlo), esp0));
3529          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIhi), esp4));
3530          addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3531          add_to_esp(env, 16);
3532          return dst;
3533       }
3534 
3535       case Iop_64HLtoV128: {
3536          HReg r3, r2, r1, r0;
3537          X86AMode* esp0  = X86AMode_IR(0, hregX86_ESP());
3538          X86AMode* esp4  = advance4(esp0);
3539          X86AMode* esp8  = advance4(esp4);
3540          X86AMode* esp12 = advance4(esp8);
3541          HReg dst = newVRegV(env);
3542 	 /* do this via the stack (easy, convenient, etc) */
3543          sub_from_esp(env, 16);
3544          /* Do the less significant 64 bits */
3545          iselInt64Expr(&r1, &r0, env, e->Iex.Binop.arg2);
3546          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r0), esp0));
3547          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r1), esp4));
3548          /* Do the more significant 64 bits */
3549          iselInt64Expr(&r3, &r2, env, e->Iex.Binop.arg1);
3550          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r2), esp8));
3551          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r3), esp12));
3552 	 /* Fetch result back from stack. */
3553          addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3554          add_to_esp(env, 16);
3555          return dst;
3556       }
3557 
3558       case Iop_CmpEQ32Fx4: op = Xsse_CMPEQF; goto do_32Fx4;
3559       case Iop_CmpLT32Fx4: op = Xsse_CMPLTF; goto do_32Fx4;
3560       case Iop_CmpLE32Fx4: op = Xsse_CMPLEF; goto do_32Fx4;
3561       case Iop_CmpUN32Fx4: op = Xsse_CMPUNF; goto do_32Fx4;
3562       case Iop_Max32Fx4:   op = Xsse_MAXF;   goto do_32Fx4;
3563       case Iop_Min32Fx4:   op = Xsse_MINF;   goto do_32Fx4;
3564       do_32Fx4:
3565       {
3566          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3567          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3568          HReg dst = newVRegV(env);
3569          addInstr(env, mk_vMOVsd_RR(argL, dst));
3570          addInstr(env, X86Instr_Sse32Fx4(op, argR, dst));
3571          return dst;
3572       }
3573 
3574       case Iop_CmpEQ64Fx2: op = Xsse_CMPEQF; goto do_64Fx2;
3575       case Iop_CmpLT64Fx2: op = Xsse_CMPLTF; goto do_64Fx2;
3576       case Iop_CmpLE64Fx2: op = Xsse_CMPLEF; goto do_64Fx2;
3577       case Iop_CmpUN64Fx2: op = Xsse_CMPUNF; goto do_64Fx2;
3578       case Iop_Max64Fx2:   op = Xsse_MAXF;   goto do_64Fx2;
3579       case Iop_Min64Fx2:   op = Xsse_MINF;   goto do_64Fx2;
3580       do_64Fx2:
3581       {
3582          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3583          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3584          HReg dst = newVRegV(env);
3585          REQUIRE_SSE2;
3586          addInstr(env, mk_vMOVsd_RR(argL, dst));
3587          addInstr(env, X86Instr_Sse64Fx2(op, argR, dst));
3588          return dst;
3589       }
3590 
3591       case Iop_CmpEQ32F0x4: op = Xsse_CMPEQF; goto do_32F0x4;
3592       case Iop_CmpLT32F0x4: op = Xsse_CMPLTF; goto do_32F0x4;
3593       case Iop_CmpLE32F0x4: op = Xsse_CMPLEF; goto do_32F0x4;
3594       case Iop_CmpUN32F0x4: op = Xsse_CMPUNF; goto do_32F0x4;
3595       case Iop_Add32F0x4:   op = Xsse_ADDF;   goto do_32F0x4;
3596       case Iop_Div32F0x4:   op = Xsse_DIVF;   goto do_32F0x4;
3597       case Iop_Max32F0x4:   op = Xsse_MAXF;   goto do_32F0x4;
3598       case Iop_Min32F0x4:   op = Xsse_MINF;   goto do_32F0x4;
3599       case Iop_Mul32F0x4:   op = Xsse_MULF;   goto do_32F0x4;
3600       case Iop_Sub32F0x4:   op = Xsse_SUBF;   goto do_32F0x4;
3601       do_32F0x4: {
3602          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3603          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3604          HReg dst = newVRegV(env);
3605          addInstr(env, mk_vMOVsd_RR(argL, dst));
3606          addInstr(env, X86Instr_Sse32FLo(op, argR, dst));
3607          return dst;
3608       }
3609 
3610       case Iop_CmpEQ64F0x2: op = Xsse_CMPEQF; goto do_64F0x2;
3611       case Iop_CmpLT64F0x2: op = Xsse_CMPLTF; goto do_64F0x2;
3612       case Iop_CmpLE64F0x2: op = Xsse_CMPLEF; goto do_64F0x2;
3613       case Iop_CmpUN64F0x2: op = Xsse_CMPUNF; goto do_64F0x2;
3614       case Iop_Add64F0x2:   op = Xsse_ADDF;   goto do_64F0x2;
3615       case Iop_Div64F0x2:   op = Xsse_DIVF;   goto do_64F0x2;
3616       case Iop_Max64F0x2:   op = Xsse_MAXF;   goto do_64F0x2;
3617       case Iop_Min64F0x2:   op = Xsse_MINF;   goto do_64F0x2;
3618       case Iop_Mul64F0x2:   op = Xsse_MULF;   goto do_64F0x2;
3619       case Iop_Sub64F0x2:   op = Xsse_SUBF;   goto do_64F0x2;
3620       do_64F0x2: {
3621          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3622          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3623          HReg dst = newVRegV(env);
3624          REQUIRE_SSE2;
3625          addInstr(env, mk_vMOVsd_RR(argL, dst));
3626          addInstr(env, X86Instr_Sse64FLo(op, argR, dst));
3627          return dst;
3628       }
3629 
3630       case Iop_QNarrowBin32Sto16Sx8:
3631          op = Xsse_PACKSSD; arg1isEReg = True; goto do_SseReRg;
3632       case Iop_QNarrowBin16Sto8Sx16:
3633          op = Xsse_PACKSSW; arg1isEReg = True; goto do_SseReRg;
3634       case Iop_QNarrowBin16Sto8Ux16:
3635          op = Xsse_PACKUSW; arg1isEReg = True; goto do_SseReRg;
3636 
3637       case Iop_InterleaveHI8x16:
3638          op = Xsse_UNPCKHB; arg1isEReg = True; goto do_SseReRg;
3639       case Iop_InterleaveHI16x8:
3640          op = Xsse_UNPCKHW; arg1isEReg = True; goto do_SseReRg;
3641       case Iop_InterleaveHI32x4:
3642          op = Xsse_UNPCKHD; arg1isEReg = True; goto do_SseReRg;
3643       case Iop_InterleaveHI64x2:
3644          op = Xsse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg;
3645 
3646       case Iop_InterleaveLO8x16:
3647          op = Xsse_UNPCKLB; arg1isEReg = True; goto do_SseReRg;
3648       case Iop_InterleaveLO16x8:
3649          op = Xsse_UNPCKLW; arg1isEReg = True; goto do_SseReRg;
3650       case Iop_InterleaveLO32x4:
3651          op = Xsse_UNPCKLD; arg1isEReg = True; goto do_SseReRg;
3652       case Iop_InterleaveLO64x2:
3653          op = Xsse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg;
3654 
3655       case Iop_AndV128:    op = Xsse_AND;      goto do_SseReRg;
3656       case Iop_OrV128:     op = Xsse_OR;       goto do_SseReRg;
3657       case Iop_XorV128:    op = Xsse_XOR;      goto do_SseReRg;
3658       case Iop_Add8x16:    op = Xsse_ADD8;     goto do_SseReRg;
3659       case Iop_Add16x8:    op = Xsse_ADD16;    goto do_SseReRg;
3660       case Iop_Add32x4:    op = Xsse_ADD32;    goto do_SseReRg;
3661       case Iop_Add64x2:    op = Xsse_ADD64;    goto do_SseReRg;
3662       case Iop_QAdd8Sx16:  op = Xsse_QADD8S;   goto do_SseReRg;
3663       case Iop_QAdd16Sx8:  op = Xsse_QADD16S;  goto do_SseReRg;
3664       case Iop_QAdd8Ux16:  op = Xsse_QADD8U;   goto do_SseReRg;
3665       case Iop_QAdd16Ux8:  op = Xsse_QADD16U;  goto do_SseReRg;
3666       case Iop_Avg8Ux16:   op = Xsse_AVG8U;    goto do_SseReRg;
3667       case Iop_Avg16Ux8:   op = Xsse_AVG16U;   goto do_SseReRg;
3668       case Iop_CmpEQ8x16:  op = Xsse_CMPEQ8;   goto do_SseReRg;
3669       case Iop_CmpEQ16x8:  op = Xsse_CMPEQ16;  goto do_SseReRg;
3670       case Iop_CmpEQ32x4:  op = Xsse_CMPEQ32;  goto do_SseReRg;
3671       case Iop_CmpGT8Sx16: op = Xsse_CMPGT8S;  goto do_SseReRg;
3672       case Iop_CmpGT16Sx8: op = Xsse_CMPGT16S; goto do_SseReRg;
3673       case Iop_CmpGT32Sx4: op = Xsse_CMPGT32S; goto do_SseReRg;
3674       case Iop_Max16Sx8:   op = Xsse_MAX16S;   goto do_SseReRg;
3675       case Iop_Max8Ux16:   op = Xsse_MAX8U;    goto do_SseReRg;
3676       case Iop_Min16Sx8:   op = Xsse_MIN16S;   goto do_SseReRg;
3677       case Iop_Min8Ux16:   op = Xsse_MIN8U;    goto do_SseReRg;
3678       case Iop_MulHi16Ux8: op = Xsse_MULHI16U; goto do_SseReRg;
3679       case Iop_MulHi16Sx8: op = Xsse_MULHI16S; goto do_SseReRg;
3680       case Iop_Mul16x8:    op = Xsse_MUL16;    goto do_SseReRg;
3681       case Iop_Sub8x16:    op = Xsse_SUB8;     goto do_SseReRg;
3682       case Iop_Sub16x8:    op = Xsse_SUB16;    goto do_SseReRg;
3683       case Iop_Sub32x4:    op = Xsse_SUB32;    goto do_SseReRg;
3684       case Iop_Sub64x2:    op = Xsse_SUB64;    goto do_SseReRg;
3685       case Iop_QSub8Sx16:  op = Xsse_QSUB8S;   goto do_SseReRg;
3686       case Iop_QSub16Sx8:  op = Xsse_QSUB16S;  goto do_SseReRg;
3687       case Iop_QSub8Ux16:  op = Xsse_QSUB8U;   goto do_SseReRg;
3688       case Iop_QSub16Ux8:  op = Xsse_QSUB16U;  goto do_SseReRg;
3689       do_SseReRg: {
3690          HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
3691          HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
3692          HReg dst = newVRegV(env);
3693          if (op != Xsse_OR && op != Xsse_AND && op != Xsse_XOR)
3694             REQUIRE_SSE2;
3695          if (arg1isEReg) {
3696             addInstr(env, mk_vMOVsd_RR(arg2, dst));
3697             addInstr(env, X86Instr_SseReRg(op, arg1, dst));
3698          } else {
3699             addInstr(env, mk_vMOVsd_RR(arg1, dst));
3700             addInstr(env, X86Instr_SseReRg(op, arg2, dst));
3701          }
3702          return dst;
3703       }
3704 
3705       case Iop_ShlN16x8: op = Xsse_SHL16; goto do_SseShift;
3706       case Iop_ShlN32x4: op = Xsse_SHL32; goto do_SseShift;
3707       case Iop_ShlN64x2: op = Xsse_SHL64; goto do_SseShift;
3708       case Iop_SarN16x8: op = Xsse_SAR16; goto do_SseShift;
3709       case Iop_SarN32x4: op = Xsse_SAR32; goto do_SseShift;
3710       case Iop_ShrN16x8: op = Xsse_SHR16; goto do_SseShift;
3711       case Iop_ShrN32x4: op = Xsse_SHR32; goto do_SseShift;
3712       case Iop_ShrN64x2: op = Xsse_SHR64; goto do_SseShift;
3713       do_SseShift: {
3714          HReg      greg = iselVecExpr(env, e->Iex.Binop.arg1);
3715          X86RMI*   rmi  = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
3716          X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3717          HReg      ereg = newVRegV(env);
3718          HReg      dst  = newVRegV(env);
3719          REQUIRE_SSE2;
3720          addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3721          addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3722          addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3723          addInstr(env, X86Instr_Push(rmi));
3724          addInstr(env, X86Instr_SseLdSt(True/*load*/, ereg, esp0));
3725 	 addInstr(env, mk_vMOVsd_RR(greg, dst));
3726          addInstr(env, X86Instr_SseReRg(op, ereg, dst));
3727          add_to_esp(env, 16);
3728          return dst;
3729       }
3730 
3731       case Iop_NarrowBin32to16x8:
3732          fn = (HWord)h_generic_calc_NarrowBin32to16x8;
3733          goto do_SseAssistedBinary;
3734       case Iop_NarrowBin16to8x16:
3735          fn = (HWord)h_generic_calc_NarrowBin16to8x16;
3736          goto do_SseAssistedBinary;
3737       do_SseAssistedBinary: {
3738          /* As with the amd64 case (where this is copied from) we
3739             generate pretty bad code. */
3740          vassert(fn != 0);
3741          HReg dst = newVRegV(env);
3742          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3743          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3744          HReg argp = newVRegI(env);
3745          /* subl $112, %esp         -- make a space */
3746          sub_from_esp(env, 112);
3747          /* leal 48(%esp), %r_argp  -- point into it */
3748          addInstr(env, X86Instr_Lea32(X86AMode_IR(48, hregX86_ESP()),
3749                                       argp));
3750          /* andl $-16, %r_argp      -- 16-align the pointer */
3751          addInstr(env, X86Instr_Alu32R(Xalu_AND,
3752                                        X86RMI_Imm( ~(UInt)15 ),
3753                                        argp));
3754          /* Prepare 3 arg regs:
3755             leal  0(%r_argp), %eax
3756             leal 16(%r_argp), %edx
3757             leal 32(%r_argp), %ecx
3758          */
3759          addInstr(env, X86Instr_Lea32(X86AMode_IR(0, argp),
3760                                       hregX86_EAX()));
3761          addInstr(env, X86Instr_Lea32(X86AMode_IR(16, argp),
3762                                       hregX86_EDX()));
3763          addInstr(env, X86Instr_Lea32(X86AMode_IR(32, argp),
3764                                       hregX86_ECX()));
3765          /* Store the two args, at (%edx) and (%ecx):
3766             movupd  %argL, 0(%edx)
3767             movupd  %argR, 0(%ecx)
3768          */
3769          addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argL,
3770                                         X86AMode_IR(0, hregX86_EDX())));
3771          addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argR,
3772                                         X86AMode_IR(0, hregX86_ECX())));
3773          /* call the helper */
3774          addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
3775                                       3, mk_RetLoc_simple(RLPri_None) ));
3776          /* fetch the result from memory, using %r_argp, which the
3777             register allocator will keep alive across the call. */
3778          addInstr(env, X86Instr_SseLdSt(True/*isLoad*/, dst,
3779                                         X86AMode_IR(0, argp)));
3780          /* and finally, clear the space */
3781          add_to_esp(env, 112);
3782          return dst;
3783       }
3784 
3785       default:
3786          break;
3787    } /* switch (e->Iex.Binop.op) */
3788    } /* if (e->tag == Iex_Binop) */
3789 
3790 
3791    if (e->tag == Iex_Triop) {
3792    IRTriop *triop = e->Iex.Triop.details;
3793    switch (triop->op) {
3794 
3795       case Iop_Add32Fx4: op = Xsse_ADDF; goto do_32Fx4_w_rm;
3796       case Iop_Sub32Fx4: op = Xsse_SUBF; goto do_32Fx4_w_rm;
3797       case Iop_Mul32Fx4: op = Xsse_MULF; goto do_32Fx4_w_rm;
3798       case Iop_Div32Fx4: op = Xsse_DIVF; goto do_32Fx4_w_rm;
3799       do_32Fx4_w_rm:
3800       {
3801          HReg argL = iselVecExpr(env, triop->arg2);
3802          HReg argR = iselVecExpr(env, triop->arg3);
3803          HReg dst = newVRegV(env);
3804          addInstr(env, mk_vMOVsd_RR(argL, dst));
3805          /* XXXROUNDINGFIXME */
3806          /* set roundingmode here */
3807          addInstr(env, X86Instr_Sse32Fx4(op, argR, dst));
3808          return dst;
3809       }
3810 
3811       case Iop_Add64Fx2: op = Xsse_ADDF; goto do_64Fx2_w_rm;
3812       case Iop_Sub64Fx2: op = Xsse_SUBF; goto do_64Fx2_w_rm;
3813       case Iop_Mul64Fx2: op = Xsse_MULF; goto do_64Fx2_w_rm;
3814       case Iop_Div64Fx2: op = Xsse_DIVF; goto do_64Fx2_w_rm;
3815       do_64Fx2_w_rm:
3816       {
3817          HReg argL = iselVecExpr(env, triop->arg2);
3818          HReg argR = iselVecExpr(env, triop->arg3);
3819          HReg dst = newVRegV(env);
3820          REQUIRE_SSE2;
3821          addInstr(env, mk_vMOVsd_RR(argL, dst));
3822          /* XXXROUNDINGFIXME */
3823          /* set roundingmode here */
3824          addInstr(env, X86Instr_Sse64Fx2(op, argR, dst));
3825          return dst;
3826       }
3827 
3828       default:
3829          break;
3830    } /* switch (triop->op) */
3831    } /* if (e->tag == Iex_Triop) */
3832 
3833 
3834    if (e->tag == Iex_ITE) { // VFD
3835       HReg r1  = iselVecExpr(env, e->Iex.ITE.iftrue);
3836       HReg r0  = iselVecExpr(env, e->Iex.ITE.iffalse);
3837       HReg dst = newVRegV(env);
3838       addInstr(env, mk_vMOVsd_RR(r1,dst));
3839       X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
3840       addInstr(env, X86Instr_SseCMov(cc ^ 1, r0, dst));
3841       return dst;
3842    }
3843 
3844    vec_fail:
3845    vex_printf("iselVecExpr (hwcaps = %s): can't reduce\n",
3846               LibVEX_ppVexHwCaps(VexArchX86,env->hwcaps));
3847    ppIRExpr(e);
3848    vpanic("iselVecExpr_wrk");
3849 
3850 #  undef REQUIRE_SSE1
3851 #  undef REQUIRE_SSE2
3852 #  undef SSE2_OR_ABOVE
3853 }
3854 
3855 
3856 /*---------------------------------------------------------*/
3857 /*--- ISEL: Statements                                  ---*/
3858 /*---------------------------------------------------------*/
3859 
iselStmt(ISelEnv * env,IRStmt * stmt)3860 static void iselStmt ( ISelEnv* env, IRStmt* stmt )
3861 {
3862    if (vex_traceflags & VEX_TRACE_VCODE) {
3863       vex_printf("\n-- ");
3864       ppIRStmt(stmt);
3865       vex_printf("\n");
3866    }
3867 
3868    switch (stmt->tag) {
3869 
3870    /* --------- STORE --------- */
3871    case Ist_Store: {
3872       IRType    tya   = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
3873       IRType    tyd   = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
3874       IREndness end   = stmt->Ist.Store.end;
3875 
3876       if (tya != Ity_I32 || end != Iend_LE)
3877          goto stmt_fail;
3878 
3879       if (tyd == Ity_I32) {
3880          X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3881          X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Store.data);
3882          addInstr(env, X86Instr_Alu32M(Xalu_MOV,ri,am));
3883          return;
3884       }
3885       if (tyd == Ity_I8 || tyd == Ity_I16) {
3886          X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3887          HReg r = iselIntExpr_R(env, stmt->Ist.Store.data);
3888          addInstr(env, X86Instr_Store( toUChar(tyd==Ity_I8 ? 1 : 2),
3889                                        r,am ));
3890          return;
3891       }
3892       if (tyd == Ity_F64) {
3893          X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3894          HReg r = iselDblExpr(env, stmt->Ist.Store.data);
3895          addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, r, am));
3896          return;
3897       }
3898       if (tyd == Ity_F32) {
3899          X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3900          HReg r = iselFltExpr(env, stmt->Ist.Store.data);
3901          addInstr(env, X86Instr_FpLdSt(False/*store*/, 4, r, am));
3902          return;
3903       }
3904       if (tyd == Ity_I64) {
3905          HReg vHi, vLo, rA;
3906          iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Store.data);
3907          rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
3908          addInstr(env, X86Instr_Alu32M(
3909                           Xalu_MOV, X86RI_Reg(vLo), X86AMode_IR(0, rA)));
3910          addInstr(env, X86Instr_Alu32M(
3911                           Xalu_MOV, X86RI_Reg(vHi), X86AMode_IR(4, rA)));
3912          return;
3913       }
3914       if (tyd == Ity_V128) {
3915          X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3916          HReg r = iselVecExpr(env, stmt->Ist.Store.data);
3917          addInstr(env, X86Instr_SseLdSt(False/*store*/, r, am));
3918          return;
3919       }
3920       break;
3921    }
3922 
3923    /* --------- PUT --------- */
3924    case Ist_Put: {
3925       IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
3926       if (ty == Ity_I32) {
3927          /* We're going to write to memory, so compute the RHS into an
3928             X86RI. */
3929          X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data);
3930          addInstr(env,
3931                   X86Instr_Alu32M(
3932                      Xalu_MOV,
3933                      ri,
3934                      X86AMode_IR(stmt->Ist.Put.offset,hregX86_EBP())
3935                  ));
3936          return;
3937       }
3938       if (ty == Ity_I8 || ty == Ity_I16) {
3939          HReg r = iselIntExpr_R(env, stmt->Ist.Put.data);
3940          addInstr(env, X86Instr_Store(
3941                           toUChar(ty==Ity_I8 ? 1 : 2),
3942                           r,
3943                           X86AMode_IR(stmt->Ist.Put.offset,
3944                                       hregX86_EBP())));
3945          return;
3946       }
3947       if (ty == Ity_I64) {
3948          HReg vHi, vLo;
3949          X86AMode* am  = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
3950          X86AMode* am4 = advance4(am);
3951          iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Put.data);
3952          addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vLo), am ));
3953          addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vHi), am4 ));
3954          return;
3955       }
3956       if (ty == Ity_V128) {
3957          HReg      vec = iselVecExpr(env, stmt->Ist.Put.data);
3958          X86AMode* am  = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
3959          addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, am));
3960          return;
3961       }
3962       if (ty == Ity_F32) {
3963          HReg f32 = iselFltExpr(env, stmt->Ist.Put.data);
3964          X86AMode* am  = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
3965          set_FPU_rounding_default(env); /* paranoia */
3966          addInstr(env, X86Instr_FpLdSt( False/*store*/, 4, f32, am ));
3967          return;
3968       }
3969       if (ty == Ity_F64) {
3970          HReg f64 = iselDblExpr(env, stmt->Ist.Put.data);
3971          X86AMode* am  = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
3972          set_FPU_rounding_default(env); /* paranoia */
3973          addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, f64, am ));
3974          return;
3975       }
3976       break;
3977    }
3978 
3979    /* --------- Indexed PUT --------- */
3980    case Ist_PutI: {
3981       IRPutI *puti = stmt->Ist.PutI.details;
3982 
3983       X86AMode* am
3984          = genGuestArrayOffset(
3985               env, puti->descr,
3986                    puti->ix, puti->bias );
3987 
3988       IRType ty = typeOfIRExpr(env->type_env, puti->data);
3989       if (ty == Ity_F64) {
3990          HReg val = iselDblExpr(env, puti->data);
3991          addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, val, am ));
3992          return;
3993       }
3994       if (ty == Ity_I8) {
3995          HReg r = iselIntExpr_R(env, puti->data);
3996          addInstr(env, X86Instr_Store( 1, r, am ));
3997          return;
3998       }
3999       if (ty == Ity_I32) {
4000          HReg r = iselIntExpr_R(env, puti->data);
4001          addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(r), am ));
4002          return;
4003       }
4004       if (ty == Ity_I64) {
4005          HReg rHi, rLo;
4006          X86AMode* am4 = advance4(am);
4007          iselInt64Expr(&rHi, &rLo, env, puti->data);
4008          addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rLo), am ));
4009          addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rHi), am4 ));
4010          return;
4011       }
4012       break;
4013    }
4014 
4015    /* --------- TMP --------- */
4016    case Ist_WrTmp: {
4017       IRTemp tmp = stmt->Ist.WrTmp.tmp;
4018       IRType ty = typeOfIRTemp(env->type_env, tmp);
4019 
4020       /* optimisation: if stmt->Ist.WrTmp.data is Add32(..,..),
4021          compute it into an AMode and then use LEA.  This usually
4022          produces fewer instructions, often because (for memcheck
4023          created IR) we get t = address-expression, (t is later used
4024          twice) and so doing this naturally turns address-expression
4025          back into an X86 amode. */
4026       if (ty == Ity_I32
4027           && stmt->Ist.WrTmp.data->tag == Iex_Binop
4028           && stmt->Ist.WrTmp.data->Iex.Binop.op == Iop_Add32) {
4029          X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.WrTmp.data);
4030          HReg dst = lookupIRTemp(env, tmp);
4031          if (am->tag == Xam_IR && am->Xam.IR.imm == 0) {
4032             /* Hmm, iselIntExpr_AMode wimped out and just computed the
4033                value into a register.  Just emit a normal reg-reg move
4034                so reg-alloc can coalesce it away in the usual way. */
4035             HReg src = am->Xam.IR.reg;
4036             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst));
4037          } else {
4038             addInstr(env, X86Instr_Lea32(am,dst));
4039          }
4040          return;
4041       }
4042 
4043       if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
4044          X86RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.WrTmp.data);
4045          HReg dst = lookupIRTemp(env, tmp);
4046          addInstr(env, X86Instr_Alu32R(Xalu_MOV,rmi,dst));
4047          return;
4048       }
4049       if (ty == Ity_I64) {
4050          HReg rHi, rLo, dstHi, dstLo;
4051          iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
4052          lookupIRTemp64( &dstHi, &dstLo, env, tmp);
4053          addInstr(env, mk_iMOVsd_RR(rHi,dstHi) );
4054          addInstr(env, mk_iMOVsd_RR(rLo,dstLo) );
4055          return;
4056       }
4057       if (ty == Ity_I1) {
4058          X86CondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
4059          HReg dst = lookupIRTemp(env, tmp);
4060          addInstr(env, X86Instr_Set32(cond, dst));
4061          return;
4062       }
4063       if (ty == Ity_F64) {
4064          HReg dst = lookupIRTemp(env, tmp);
4065          HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
4066          addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst));
4067          return;
4068       }
4069       if (ty == Ity_F32) {
4070          HReg dst = lookupIRTemp(env, tmp);
4071          HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
4072          addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst));
4073          return;
4074       }
4075       if (ty == Ity_V128) {
4076          HReg dst = lookupIRTemp(env, tmp);
4077          HReg src = iselVecExpr(env, stmt->Ist.WrTmp.data);
4078          addInstr(env, mk_vMOVsd_RR(src,dst));
4079          return;
4080       }
4081       break;
4082    }
4083 
4084    /* --------- Call to DIRTY helper --------- */
4085    case Ist_Dirty: {
4086       IRDirty* d = stmt->Ist.Dirty.details;
4087 
4088       /* Figure out the return type, if any. */
4089       IRType retty = Ity_INVALID;
4090       if (d->tmp != IRTemp_INVALID)
4091          retty = typeOfIRTemp(env->type_env, d->tmp);
4092 
4093       Bool retty_ok = False;
4094       switch (retty) {
4095          case Ity_INVALID: /* function doesn't return anything */
4096          case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
4097          case Ity_V128:
4098             retty_ok = True; break;
4099          default:
4100             break;
4101       }
4102       if (!retty_ok)
4103          break; /* will go to stmt_fail: */
4104 
4105       /* Marshal args, do the call, and set the return value to
4106          0x555..555 if this is a conditional call that returns a value
4107          and the call is skipped. */
4108       UInt   addToSp = 0;
4109       RetLoc rloc    = mk_RetLoc_INVALID();
4110       doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
4111       vassert(is_sane_RetLoc(rloc));
4112 
4113       /* Now figure out what to do with the returned value, if any. */
4114       switch (retty) {
4115          case Ity_INVALID: {
4116             /* No return value.  Nothing to do. */
4117             vassert(d->tmp == IRTemp_INVALID);
4118             vassert(rloc.pri == RLPri_None);
4119             vassert(addToSp == 0);
4120             return;
4121          }
4122          case Ity_I32: case Ity_I16: case Ity_I8: {
4123             /* The returned value is in %eax.  Park it in the register
4124                associated with tmp. */
4125             vassert(rloc.pri == RLPri_Int);
4126             vassert(addToSp == 0);
4127             HReg dst = lookupIRTemp(env, d->tmp);
4128             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dst) );
4129             return;
4130          }
4131          case Ity_I64: {
4132             /* The returned value is in %edx:%eax.  Park it in the
4133                register-pair associated with tmp. */
4134             vassert(rloc.pri == RLPri_2Int);
4135             vassert(addToSp == 0);
4136             HReg dstHi, dstLo;
4137             lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
4138             addInstr(env, mk_iMOVsd_RR(hregX86_EDX(),dstHi) );
4139             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dstLo) );
4140             return;
4141          }
4142          case Ity_V128: {
4143             /* The returned value is on the stack, and *retloc tells
4144                us where.  Fish it off the stack and then move the
4145                stack pointer upwards to clear it, as directed by
4146                doHelperCall. */
4147             vassert(rloc.pri == RLPri_V128SpRel);
4148             vassert(addToSp >= 16);
4149             HReg      dst = lookupIRTemp(env, d->tmp);
4150             X86AMode* am  = X86AMode_IR(rloc.spOff, hregX86_ESP());
4151             addInstr(env, X86Instr_SseLdSt( True/*load*/, dst, am ));
4152             add_to_esp(env, addToSp);
4153             return;
4154          }
4155          default:
4156             /*NOTREACHED*/
4157             vassert(0);
4158       }
4159       break;
4160    }
4161 
4162    /* --------- MEM FENCE --------- */
4163    case Ist_MBE:
4164       switch (stmt->Ist.MBE.event) {
4165          case Imbe_Fence:
4166             addInstr(env, X86Instr_MFence(env->hwcaps));
4167             return;
4168          default:
4169             break;
4170       }
4171       break;
4172 
4173    /* --------- ACAS --------- */
4174    case Ist_CAS:
4175       if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
4176          /* "normal" singleton CAS */
4177          UChar  sz;
4178          IRCAS* cas = stmt->Ist.CAS.details;
4179          IRType ty  = typeOfIRExpr(env->type_env, cas->dataLo);
4180          /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
4181          X86AMode* am = iselIntExpr_AMode(env, cas->addr);
4182          HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
4183          HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
4184          HReg rOldLo  = lookupIRTemp(env, cas->oldLo);
4185          vassert(cas->expdHi == NULL);
4186          vassert(cas->dataHi == NULL);
4187          addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
4188          addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX()));
4189          addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX()));
4190          switch (ty) {
4191             case Ity_I32: sz = 4; break;
4192             case Ity_I16: sz = 2; break;
4193             case Ity_I8:  sz = 1; break;
4194             default: goto unhandled_cas;
4195          }
4196          addInstr(env, X86Instr_ACAS(am, sz));
4197          addInstr(env,
4198                   X86Instr_CMov32(Xcc_NZ,
4199                                   X86RM_Reg(hregX86_EAX()), rOldLo));
4200          return;
4201       } else {
4202          /* double CAS */
4203          IRCAS* cas = stmt->Ist.CAS.details;
4204          IRType ty  = typeOfIRExpr(env->type_env, cas->dataLo);
4205          /* only 32-bit allowed in this case */
4206          /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
4207          /* get: cas->expdHi into %edx, and cas->dataHi into %ecx */
4208          X86AMode* am = iselIntExpr_AMode(env, cas->addr);
4209          HReg rDataHi = iselIntExpr_R(env, cas->dataHi);
4210          HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
4211          HReg rExpdHi = iselIntExpr_R(env, cas->expdHi);
4212          HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
4213          HReg rOldHi  = lookupIRTemp(env, cas->oldHi);
4214          HReg rOldLo  = lookupIRTemp(env, cas->oldLo);
4215          if (ty != Ity_I32)
4216             goto unhandled_cas;
4217          addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi));
4218          addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
4219          addInstr(env, mk_iMOVsd_RR(rExpdHi, hregX86_EDX()));
4220          addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX()));
4221          addInstr(env, mk_iMOVsd_RR(rDataHi, hregX86_ECX()));
4222          addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX()));
4223          addInstr(env, X86Instr_DACAS(am));
4224          addInstr(env,
4225                   X86Instr_CMov32(Xcc_NZ,
4226                                   X86RM_Reg(hregX86_EDX()), rOldHi));
4227          addInstr(env,
4228                   X86Instr_CMov32(Xcc_NZ,
4229                                   X86RM_Reg(hregX86_EAX()), rOldLo));
4230          return;
4231       }
4232       unhandled_cas:
4233       break;
4234 
4235    /* --------- INSTR MARK --------- */
4236    /* Doesn't generate any executable code ... */
4237    case Ist_IMark:
4238        return;
4239 
4240    /* --------- NO-OP --------- */
4241    /* Fairly self-explanatory, wouldn't you say? */
4242    case Ist_NoOp:
4243        return;
4244 
4245    /* --------- EXIT --------- */
4246    case Ist_Exit: {
4247       if (stmt->Ist.Exit.dst->tag != Ico_U32)
4248          vpanic("iselStmt(x86): Ist_Exit: dst is not a 32-bit value");
4249 
4250       X86CondCode cc    = iselCondCode(env, stmt->Ist.Exit.guard);
4251       X86AMode*   amEIP = X86AMode_IR(stmt->Ist.Exit.offsIP,
4252                                       hregX86_EBP());
4253 
4254       /* Case: boring transfer to known address */
4255       if (stmt->Ist.Exit.jk == Ijk_Boring) {
4256          if (env->chainingAllowed) {
4257             /* .. almost always true .. */
4258             /* Skip the event check at the dst if this is a forwards
4259                edge. */
4260             Bool toFastEP
4261                = ((Addr32)stmt->Ist.Exit.dst->Ico.U32) > env->max_ga;
4262             if (0) vex_printf("%s", toFastEP ? "Y" : ",");
4263             addInstr(env, X86Instr_XDirect(stmt->Ist.Exit.dst->Ico.U32,
4264                                            amEIP, cc, toFastEP));
4265          } else {
4266             /* .. very occasionally .. */
4267             /* We can't use chaining, so ask for an assisted transfer,
4268                as that's the only alternative that is allowable. */
4269             HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4270             addInstr(env, X86Instr_XAssisted(r, amEIP, cc, Ijk_Boring));
4271          }
4272          return;
4273       }
4274 
4275       /* Case: assisted transfer to arbitrary address */
4276       switch (stmt->Ist.Exit.jk) {
4277          /* Keep this list in sync with that in iselNext below */
4278          case Ijk_ClientReq:
4279          case Ijk_EmWarn:
4280          case Ijk_MapFail:
4281          case Ijk_NoDecode:
4282          case Ijk_NoRedir:
4283          case Ijk_SigSEGV:
4284          case Ijk_SigTRAP:
4285          case Ijk_Sys_int128:
4286          case Ijk_Sys_int129:
4287          case Ijk_Sys_int130:
4288          case Ijk_Sys_int145:
4289          case Ijk_Sys_int210:
4290          case Ijk_Sys_syscall:
4291          case Ijk_Sys_sysenter:
4292          case Ijk_InvalICache:
4293          case Ijk_Yield:
4294          {
4295             HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4296             addInstr(env, X86Instr_XAssisted(r, amEIP, cc, stmt->Ist.Exit.jk));
4297             return;
4298          }
4299          default:
4300             break;
4301       }
4302 
4303       /* Do we ever expect to see any other kind? */
4304       goto stmt_fail;
4305    }
4306 
4307    default: break;
4308    }
4309   stmt_fail:
4310    ppIRStmt(stmt);
4311    vpanic("iselStmt");
4312 }
4313 
4314 
4315 /*---------------------------------------------------------*/
4316 /*--- ISEL: Basic block terminators (Nexts)             ---*/
4317 /*---------------------------------------------------------*/
4318 
iselNext(ISelEnv * env,IRExpr * next,IRJumpKind jk,Int offsIP)4319 static void iselNext ( ISelEnv* env,
4320                        IRExpr* next, IRJumpKind jk, Int offsIP )
4321 {
4322    if (vex_traceflags & VEX_TRACE_VCODE) {
4323       vex_printf( "\n-- PUT(%d) = ", offsIP);
4324       ppIRExpr( next );
4325       vex_printf( "; exit-");
4326       ppIRJumpKind(jk);
4327       vex_printf( "\n");
4328    }
4329 
4330    /* Case: boring transfer to known address */
4331    if (next->tag == Iex_Const) {
4332       IRConst* cdst = next->Iex.Const.con;
4333       vassert(cdst->tag == Ico_U32);
4334       if (jk == Ijk_Boring || jk == Ijk_Call) {
4335          /* Boring transfer to known address */
4336          X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
4337          if (env->chainingAllowed) {
4338             /* .. almost always true .. */
4339             /* Skip the event check at the dst if this is a forwards
4340                edge. */
4341             Bool toFastEP
4342                = ((Addr32)cdst->Ico.U32) > env->max_ga;
4343             if (0) vex_printf("%s", toFastEP ? "X" : ".");
4344             addInstr(env, X86Instr_XDirect(cdst->Ico.U32,
4345                                            amEIP, Xcc_ALWAYS,
4346                                            toFastEP));
4347          } else {
4348             /* .. very occasionally .. */
4349             /* We can't use chaining, so ask for an assisted transfer,
4350                as that's the only alternative that is allowable. */
4351             HReg r = iselIntExpr_R(env, next);
4352             addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS,
4353                                              Ijk_Boring));
4354          }
4355          return;
4356       }
4357    }
4358 
4359    /* Case: call/return (==boring) transfer to any address */
4360    switch (jk) {
4361       case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
4362          HReg      r     = iselIntExpr_R(env, next);
4363          X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
4364          if (env->chainingAllowed) {
4365             addInstr(env, X86Instr_XIndir(r, amEIP, Xcc_ALWAYS));
4366          } else {
4367             addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS,
4368                                                Ijk_Boring));
4369          }
4370          return;
4371       }
4372       default:
4373          break;
4374    }
4375 
4376    /* Case: assisted transfer to arbitrary address */
4377    switch (jk) {
4378       /* Keep this list in sync with that for Ist_Exit above */
4379       case Ijk_ClientReq:
4380       case Ijk_EmWarn:
4381       case Ijk_MapFail:
4382       case Ijk_NoDecode:
4383       case Ijk_NoRedir:
4384       case Ijk_SigSEGV:
4385       case Ijk_SigTRAP:
4386       case Ijk_Sys_int128:
4387       case Ijk_Sys_int129:
4388       case Ijk_Sys_int130:
4389       case Ijk_Sys_int145:
4390       case Ijk_Sys_int210:
4391       case Ijk_Sys_syscall:
4392       case Ijk_Sys_sysenter:
4393       case Ijk_InvalICache:
4394       case Ijk_Yield:
4395       {
4396          HReg      r     = iselIntExpr_R(env, next);
4397          X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
4398          addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS, jk));
4399          return;
4400       }
4401       default:
4402          break;
4403    }
4404 
4405    vex_printf( "\n-- PUT(%d) = ", offsIP);
4406    ppIRExpr( next );
4407    vex_printf( "; exit-");
4408    ppIRJumpKind(jk);
4409    vex_printf( "\n");
4410    vassert(0); // are we expecting any other kind?
4411 }
4412 
4413 
4414 /*---------------------------------------------------------*/
4415 /*--- Insn selector top-level                           ---*/
4416 /*---------------------------------------------------------*/
4417 
4418 /* Translate an entire SB to x86 code. */
4419 
iselSB_X86(const IRSB * bb,VexArch arch_host,const VexArchInfo * archinfo_host,const VexAbiInfo * vbi,Int offs_Host_EvC_Counter,Int offs_Host_EvC_FailAddr,Bool chainingAllowed,Bool addProfInc,Addr max_ga)4420 HInstrArray* iselSB_X86 ( const IRSB* bb,
4421                           VexArch      arch_host,
4422                           const VexArchInfo* archinfo_host,
4423                           const VexAbiInfo*  vbi/*UNUSED*/,
4424                           Int offs_Host_EvC_Counter,
4425                           Int offs_Host_EvC_FailAddr,
4426                           Bool chainingAllowed,
4427                           Bool addProfInc,
4428                           Addr max_ga )
4429 {
4430    Int      i, j;
4431    HReg     hreg, hregHI;
4432    ISelEnv* env;
4433    UInt     hwcaps_host = archinfo_host->hwcaps;
4434    X86AMode *amCounter, *amFailAddr;
4435 
4436    /* sanity ... */
4437    vassert(arch_host == VexArchX86);
4438    vassert(0 == (hwcaps_host
4439                  & ~(VEX_HWCAPS_X86_MMXEXT
4440                      | VEX_HWCAPS_X86_SSE1
4441                      | VEX_HWCAPS_X86_SSE2
4442                      | VEX_HWCAPS_X86_SSE3
4443                      | VEX_HWCAPS_X86_LZCNT)));
4444 
4445    /* Check that the host's endianness is as expected. */
4446    vassert(archinfo_host->endness == VexEndnessLE);
4447 
4448    /* Make up an initial environment to use. */
4449    env = LibVEX_Alloc_inline(sizeof(ISelEnv));
4450    env->vreg_ctr = 0;
4451 
4452    /* Set up output code array. */
4453    env->code = newHInstrArray();
4454 
4455    /* Copy BB's type env. */
4456    env->type_env = bb->tyenv;
4457 
4458    /* Make up an IRTemp -> virtual HReg mapping.  This doesn't
4459       change as we go along. */
4460    env->n_vregmap = bb->tyenv->types_used;
4461    env->vregmap   = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
4462    env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
4463 
4464    /* and finally ... */
4465    env->chainingAllowed = chainingAllowed;
4466    env->hwcaps          = hwcaps_host;
4467    env->max_ga          = max_ga;
4468 
4469    /* For each IR temporary, allocate a suitably-kinded virtual
4470       register. */
4471    j = 0;
4472    for (i = 0; i < env->n_vregmap; i++) {
4473       hregHI = hreg = INVALID_HREG;
4474       switch (bb->tyenv->types[i]) {
4475          case Ity_I1:
4476          case Ity_I8:
4477          case Ity_I16:
4478          case Ity_I32:  hreg   = mkHReg(True, HRcInt32,  0, j++); break;
4479          case Ity_I64:  hreg   = mkHReg(True, HRcInt32,  0, j++);
4480                         hregHI = mkHReg(True, HRcInt32,  0, j++); break;
4481          case Ity_F32:
4482          case Ity_F64:  hreg   = mkHReg(True, HRcFlt64,  0, j++); break;
4483          case Ity_V128: hreg   = mkHReg(True, HRcVec128, 0, j++); break;
4484          default: ppIRType(bb->tyenv->types[i]);
4485                   vpanic("iselBB: IRTemp type");
4486       }
4487       env->vregmap[i]   = hreg;
4488       env->vregmapHI[i] = hregHI;
4489    }
4490    env->vreg_ctr = j;
4491 
4492    /* The very first instruction must be an event check. */
4493    amCounter  = X86AMode_IR(offs_Host_EvC_Counter,  hregX86_EBP());
4494    amFailAddr = X86AMode_IR(offs_Host_EvC_FailAddr, hregX86_EBP());
4495    addInstr(env, X86Instr_EvCheck(amCounter, amFailAddr));
4496 
4497    /* Possibly a block counter increment (for profiling).  At this
4498       point we don't know the address of the counter, so just pretend
4499       it is zero.  It will have to be patched later, but before this
4500       translation is used, by a call to LibVEX_patchProfCtr. */
4501    if (addProfInc) {
4502       addInstr(env, X86Instr_ProfInc());
4503    }
4504 
4505    /* Ok, finally we can iterate over the statements. */
4506    for (i = 0; i < bb->stmts_used; i++)
4507       iselStmt(env, bb->stmts[i]);
4508 
4509    iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
4510 
4511    /* record the number of vregs we used. */
4512    env->code->n_vregs = env->vreg_ctr;
4513    return env->code;
4514 }
4515 
4516 
4517 /*---------------------------------------------------------------*/
4518 /*--- end                                     host_x86_isel.c ---*/
4519 /*---------------------------------------------------------------*/
4520