1
2 /*---------------------------------------------------------------*/
3 /*--- begin host_x86_isel.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2015 OpenWorks LLP
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
34 */
35
36 #include "libvex_basictypes.h"
37 #include "libvex_ir.h"
38 #include "libvex.h"
39
40 #include "ir_match.h"
41 #include "main_util.h"
42 #include "main_globals.h"
43 #include "host_generic_regs.h"
44 #include "host_generic_simd64.h"
45 #include "host_generic_simd128.h"
46 #include "host_x86_defs.h"
47
48 /* TODO 21 Apr 2005:
49
50 -- (Really an assembler issue) don't emit CMov32 as a cmov
51 insn, since that's expensive on P4 and conditional branch
52 is cheaper if (as we expect) the condition is highly predictable
53
54 -- preserve xmm registers across function calls (by declaring them
55 as trashed by call insns)
56
57 -- preserve x87 ST stack discipline across function calls. Sigh.
58
59 -- Check doHelperCall: if a call is conditional, we cannot safely
60 compute any regparm args directly to registers. Hence, the
61 fast-regparm marshalling should be restricted to unconditional
62 calls only.
63 */
64
65 /*---------------------------------------------------------*/
66 /*--- x87 control word stuff ---*/
67 /*---------------------------------------------------------*/
68
69 /* Vex-generated code expects to run with the FPU set as follows: all
70 exceptions masked, round-to-nearest, precision = 53 bits. This
71 corresponds to a FPU control word value of 0x027F.
72
73 Similarly the SSE control word (%mxcsr) should be 0x1F80.
74
75 %fpucw and %mxcsr should have these values on entry to
76 Vex-generated code, and should those values should be
77 unchanged at exit.
78 */
79
80 #define DEFAULT_FPUCW 0x027F
81
82 /* debugging only, do not use */
83 /* define DEFAULT_FPUCW 0x037F */
84
85
86 /*---------------------------------------------------------*/
87 /*--- misc helpers ---*/
88 /*---------------------------------------------------------*/
89
90 /* These are duplicated in guest-x86/toIR.c */
unop(IROp op,IRExpr * a)91 static IRExpr* unop ( IROp op, IRExpr* a )
92 {
93 return IRExpr_Unop(op, a);
94 }
95
binop(IROp op,IRExpr * a1,IRExpr * a2)96 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
97 {
98 return IRExpr_Binop(op, a1, a2);
99 }
100
bind(Int binder)101 static IRExpr* bind ( Int binder )
102 {
103 return IRExpr_Binder(binder);
104 }
105
isZeroU8(IRExpr * e)106 static Bool isZeroU8 ( IRExpr* e )
107 {
108 return e->tag == Iex_Const
109 && e->Iex.Const.con->tag == Ico_U8
110 && e->Iex.Const.con->Ico.U8 == 0;
111 }
112
isZeroU32(IRExpr * e)113 static Bool isZeroU32 ( IRExpr* e )
114 {
115 return e->tag == Iex_Const
116 && e->Iex.Const.con->tag == Ico_U32
117 && e->Iex.Const.con->Ico.U32 == 0;
118 }
119
120 //static Bool isZeroU64 ( IRExpr* e )
121 //{
122 // return e->tag == Iex_Const
123 // && e->Iex.Const.con->tag == Ico_U64
124 // && e->Iex.Const.con->Ico.U64 == 0ULL;
125 //}
126
127
128 /*---------------------------------------------------------*/
129 /*--- ISelEnv ---*/
130 /*---------------------------------------------------------*/
131
132 /* This carries around:
133
134 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
135 might encounter. This is computed before insn selection starts,
136 and does not change.
137
138 - A mapping from IRTemp to HReg. This tells the insn selector
139 which virtual register(s) are associated with each IRTemp
140 temporary. This is computed before insn selection starts, and
141 does not change. We expect this mapping to map precisely the
142 same set of IRTemps as the type mapping does.
143
144 - vregmap holds the primary register for the IRTemp.
145 - vregmapHI is only used for 64-bit integer-typed
146 IRTemps. It holds the identity of a second
147 32-bit virtual HReg, which holds the high half
148 of the value.
149
150 - The code array, that is, the insns selected so far.
151
152 - A counter, for generating new virtual registers.
153
154 - The host subarchitecture we are selecting insns for.
155 This is set at the start and does not change.
156
157 - A Bool for indicating whether we may generate chain-me
158 instructions for control flow transfers, or whether we must use
159 XAssisted.
160
161 - The maximum guest address of any guest insn in this block.
162 Actually, the address of the highest-addressed byte from any insn
163 in this block. Is set at the start and does not change. This is
164 used for detecting jumps which are definitely forward-edges from
165 this block, and therefore can be made (chained) to the fast entry
166 point of the destination, thereby avoiding the destination's
167 event check.
168
169 Note, this is all (well, mostly) host-independent.
170 */
171
172 typedef
173 struct {
174 /* Constant -- are set at the start and do not change. */
175 IRTypeEnv* type_env;
176
177 HReg* vregmap;
178 HReg* vregmapHI;
179 Int n_vregmap;
180
181 UInt hwcaps;
182
183 Bool chainingAllowed;
184 Addr32 max_ga;
185
186 /* These are modified as we go along. */
187 HInstrArray* code;
188 Int vreg_ctr;
189 }
190 ISelEnv;
191
192
lookupIRTemp(ISelEnv * env,IRTemp tmp)193 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
194 {
195 vassert(tmp >= 0);
196 vassert(tmp < env->n_vregmap);
197 return env->vregmap[tmp];
198 }
199
lookupIRTemp64(HReg * vrHI,HReg * vrLO,ISelEnv * env,IRTemp tmp)200 static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
201 {
202 vassert(tmp >= 0);
203 vassert(tmp < env->n_vregmap);
204 vassert(! hregIsInvalid(env->vregmapHI[tmp]));
205 *vrLO = env->vregmap[tmp];
206 *vrHI = env->vregmapHI[tmp];
207 }
208
addInstr(ISelEnv * env,X86Instr * instr)209 static void addInstr ( ISelEnv* env, X86Instr* instr )
210 {
211 addHInstr(env->code, instr);
212 if (vex_traceflags & VEX_TRACE_VCODE) {
213 ppX86Instr(instr, False);
214 vex_printf("\n");
215 }
216 }
217
newVRegI(ISelEnv * env)218 static HReg newVRegI ( ISelEnv* env )
219 {
220 HReg reg = mkHReg(True/*virtual reg*/, HRcInt32, 0/*enc*/, env->vreg_ctr);
221 env->vreg_ctr++;
222 return reg;
223 }
224
newVRegF(ISelEnv * env)225 static HReg newVRegF ( ISelEnv* env )
226 {
227 HReg reg = mkHReg(True/*virtual reg*/, HRcFlt64, 0/*enc*/, env->vreg_ctr);
228 env->vreg_ctr++;
229 return reg;
230 }
231
newVRegV(ISelEnv * env)232 static HReg newVRegV ( ISelEnv* env )
233 {
234 HReg reg = mkHReg(True/*virtual reg*/, HRcVec128, 0/*enc*/, env->vreg_ctr);
235 env->vreg_ctr++;
236 return reg;
237 }
238
239
240 /*---------------------------------------------------------*/
241 /*--- ISEL: Forward declarations ---*/
242 /*---------------------------------------------------------*/
243
244 /* These are organised as iselXXX and iselXXX_wrk pairs. The
245 iselXXX_wrk do the real work, but are not to be called directly.
246 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
247 checks that all returned registers are virtual. You should not
248 call the _wrk version directly.
249 */
250 static X86RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e );
251 static X86RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e );
252
253 static X86RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e );
254 static X86RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e );
255
256 static X86RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e );
257 static X86RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e );
258
259 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
260 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
261
262 static X86AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e );
263 static X86AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e );
264
265 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
266 ISelEnv* env, IRExpr* e );
267 static void iselInt64Expr ( HReg* rHi, HReg* rLo,
268 ISelEnv* env, IRExpr* e );
269
270 static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
271 static X86CondCode iselCondCode ( ISelEnv* env, IRExpr* e );
272
273 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
274 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
275
276 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
277 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
278
279 static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e );
280 static HReg iselVecExpr ( ISelEnv* env, IRExpr* e );
281
282
283 /*---------------------------------------------------------*/
284 /*--- ISEL: Misc helpers ---*/
285 /*---------------------------------------------------------*/
286
287 /* Make a int reg-reg move. */
288
mk_iMOVsd_RR(HReg src,HReg dst)289 static X86Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
290 {
291 vassert(hregClass(src) == HRcInt32);
292 vassert(hregClass(dst) == HRcInt32);
293 return X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst);
294 }
295
296
297 /* Make a vector reg-reg move. */
298
mk_vMOVsd_RR(HReg src,HReg dst)299 static X86Instr* mk_vMOVsd_RR ( HReg src, HReg dst )
300 {
301 vassert(hregClass(src) == HRcVec128);
302 vassert(hregClass(dst) == HRcVec128);
303 return X86Instr_SseReRg(Xsse_MOV, src, dst);
304 }
305
306 /* Advance/retreat %esp by n. */
307
add_to_esp(ISelEnv * env,Int n)308 static void add_to_esp ( ISelEnv* env, Int n )
309 {
310 vassert(n > 0 && n < 256 && (n%4) == 0);
311 addInstr(env,
312 X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(n), hregX86_ESP()));
313 }
314
sub_from_esp(ISelEnv * env,Int n)315 static void sub_from_esp ( ISelEnv* env, Int n )
316 {
317 vassert(n > 0 && n < 256 && (n%4) == 0);
318 addInstr(env,
319 X86Instr_Alu32R(Xalu_SUB, X86RMI_Imm(n), hregX86_ESP()));
320 }
321
322
323 /* Given an amode, return one which references 4 bytes further
324 along. */
325
advance4(X86AMode * am)326 static X86AMode* advance4 ( X86AMode* am )
327 {
328 X86AMode* am4 = dopyX86AMode(am);
329 switch (am4->tag) {
330 case Xam_IRRS:
331 am4->Xam.IRRS.imm += 4; break;
332 case Xam_IR:
333 am4->Xam.IR.imm += 4; break;
334 default:
335 vpanic("advance4(x86,host)");
336 }
337 return am4;
338 }
339
340
341 /* Push an arg onto the host stack, in preparation for a call to a
342 helper function of some kind. Returns the number of 32-bit words
343 pushed. If we encounter an IRExpr_VECRET() then we expect that
344 r_vecRetAddr will be a valid register, that holds the relevant
345 address.
346 */
pushArg(ISelEnv * env,IRExpr * arg,HReg r_vecRetAddr)347 static Int pushArg ( ISelEnv* env, IRExpr* arg, HReg r_vecRetAddr )
348 {
349 if (UNLIKELY(arg->tag == Iex_VECRET)) {
350 vassert(0); //ATC
351 vassert(!hregIsInvalid(r_vecRetAddr));
352 addInstr(env, X86Instr_Push(X86RMI_Reg(r_vecRetAddr)));
353 return 1;
354 }
355 if (UNLIKELY(arg->tag == Iex_BBPTR)) {
356 addInstr(env, X86Instr_Push(X86RMI_Reg(hregX86_EBP())));
357 return 1;
358 }
359 /* Else it's a "normal" expression. */
360 IRType arg_ty = typeOfIRExpr(env->type_env, arg);
361 if (arg_ty == Ity_I32) {
362 addInstr(env, X86Instr_Push(iselIntExpr_RMI(env, arg)));
363 return 1;
364 } else
365 if (arg_ty == Ity_I64) {
366 HReg rHi, rLo;
367 iselInt64Expr(&rHi, &rLo, env, arg);
368 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
369 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
370 return 2;
371 }
372 ppIRExpr(arg);
373 vpanic("pushArg(x86): can't handle arg of this type");
374 }
375
376
377 /* Complete the call to a helper function, by calling the
378 helper and clearing the args off the stack. */
379
380 static
callHelperAndClearArgs(ISelEnv * env,X86CondCode cc,IRCallee * cee,Int n_arg_ws,RetLoc rloc)381 void callHelperAndClearArgs ( ISelEnv* env, X86CondCode cc,
382 IRCallee* cee, Int n_arg_ws,
383 RetLoc rloc )
384 {
385 /* Complication. Need to decide which reg to use as the fn address
386 pointer, in a way that doesn't trash regparm-passed
387 parameters. */
388 vassert(sizeof(void*) == 4);
389
390 addInstr(env, X86Instr_Call( cc, (Addr)cee->addr,
391 cee->regparms, rloc));
392 if (n_arg_ws > 0)
393 add_to_esp(env, 4*n_arg_ws);
394 }
395
396
397 /* Used only in doHelperCall. See big comment in doHelperCall re
398 handling of regparm args. This function figures out whether
399 evaluation of an expression might require use of a fixed register.
400 If in doubt return True (safe but suboptimal).
401 */
402 static
mightRequireFixedRegs(IRExpr * e)403 Bool mightRequireFixedRegs ( IRExpr* e )
404 {
405 if (UNLIKELY(is_IRExpr_VECRET_or_BBPTR(e))) {
406 // These are always "safe" -- either a copy of %esp in some
407 // arbitrary vreg, or a copy of %ebp, respectively.
408 return False;
409 }
410 /* Else it's a "normal" expression. */
411 switch (e->tag) {
412 case Iex_RdTmp: case Iex_Const: case Iex_Get:
413 return False;
414 default:
415 return True;
416 }
417 }
418
419
420 /* Do a complete function call. |guard| is a Ity_Bit expression
421 indicating whether or not the call happens. If guard==NULL, the
422 call is unconditional. |retloc| is set to indicate where the
423 return value is after the call. The caller (of this fn) must
424 generate code to add |stackAdjustAfterCall| to the stack pointer
425 after the call is done. */
426
427 static
doHelperCall(UInt * stackAdjustAfterCall,RetLoc * retloc,ISelEnv * env,IRExpr * guard,IRCallee * cee,IRType retTy,IRExpr ** args)428 void doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall,
429 /*OUT*/RetLoc* retloc,
430 ISelEnv* env,
431 IRExpr* guard,
432 IRCallee* cee, IRType retTy, IRExpr** args )
433 {
434 X86CondCode cc;
435 HReg argregs[3];
436 HReg tmpregs[3];
437 Bool danger;
438 Int not_done_yet, n_args, n_arg_ws, stack_limit,
439 i, argreg, argregX;
440
441 /* Set default returns. We'll update them later if needed. */
442 *stackAdjustAfterCall = 0;
443 *retloc = mk_RetLoc_INVALID();
444
445 /* These are used for cross-checking that IR-level constraints on
446 the use of Iex_VECRET and Iex_BBPTR are observed. */
447 UInt nVECRETs = 0;
448 UInt nBBPTRs = 0;
449
450 /* Marshal args for a call, do the call, and clear the stack.
451 Complexities to consider:
452
453 * The return type can be I{64,32,16,8} or V128. In the V128
454 case, it is expected that |args| will contain the special
455 node IRExpr_VECRET(), in which case this routine generates
456 code to allocate space on the stack for the vector return
457 value. Since we are not passing any scalars on the stack, it
458 is enough to preallocate the return space before marshalling
459 any arguments, in this case.
460
461 |args| may also contain IRExpr_BBPTR(), in which case the
462 value in %ebp is passed as the corresponding argument.
463
464 * If the callee claims regparmness of 1, 2 or 3, we must pass the
465 first 1, 2 or 3 args in registers (EAX, EDX, and ECX
466 respectively). To keep things relatively simple, only args of
467 type I32 may be passed as regparms -- just bomb out if anything
468 else turns up. Clearly this depends on the front ends not
469 trying to pass any other types as regparms.
470 */
471
472 /* 16 Nov 2004: the regparm handling is complicated by the
473 following problem.
474
475 Consider a call two a function with two regparm parameters:
476 f(e1,e2). We need to compute e1 into %eax and e2 into %edx.
477 Suppose code is first generated to compute e1 into %eax. Then,
478 code is generated to compute e2 into %edx. Unfortunately, if
479 the latter code sequence uses %eax, it will trash the value of
480 e1 computed by the former sequence. This could happen if (for
481 example) e2 itself involved a function call. In the code below,
482 args are evaluated right-to-left, not left-to-right, but the
483 principle and the problem are the same.
484
485 One solution is to compute all regparm-bound args into vregs
486 first, and once they are all done, move them to the relevant
487 real regs. This always gives correct code, but it also gives
488 a bunch of vreg-to-rreg moves which are usually redundant but
489 are hard for the register allocator to get rid of.
490
491 A compromise is to first examine all regparm'd argument
492 expressions. If they are all so simple that it is clear
493 they will be evaluated without use of any fixed registers,
494 use the old compute-directly-to-fixed-target scheme. If not,
495 be safe and use the via-vregs scheme.
496
497 Note this requires being able to examine an expression and
498 determine whether or not evaluation of it might use a fixed
499 register. That requires knowledge of how the rest of this
500 insn selector works. Currently just the following 3 are
501 regarded as safe -- hopefully they cover the majority of
502 arguments in practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
503 */
504 vassert(cee->regparms >= 0 && cee->regparms <= 3);
505
506 /* Count the number of args and also the VECRETs */
507 n_args = n_arg_ws = 0;
508 while (args[n_args]) {
509 IRExpr* arg = args[n_args];
510 n_args++;
511 if (UNLIKELY(arg->tag == Iex_VECRET)) {
512 nVECRETs++;
513 } else if (UNLIKELY(arg->tag == Iex_BBPTR)) {
514 nBBPTRs++;
515 }
516 }
517
518 /* If this fails, the IR is ill-formed */
519 vassert(nBBPTRs == 0 || nBBPTRs == 1);
520
521 /* If we have a VECRET, allocate space on the stack for the return
522 value, and record the stack pointer after that. */
523 HReg r_vecRetAddr = INVALID_HREG;
524 if (nVECRETs == 1) {
525 vassert(retTy == Ity_V128 || retTy == Ity_V256);
526 vassert(retTy != Ity_V256); // we don't handle that yet (if ever)
527 r_vecRetAddr = newVRegI(env);
528 sub_from_esp(env, 16);
529 addInstr(env, mk_iMOVsd_RR( hregX86_ESP(), r_vecRetAddr ));
530 } else {
531 // If either of these fail, the IR is ill-formed
532 vassert(retTy != Ity_V128 && retTy != Ity_V256);
533 vassert(nVECRETs == 0);
534 }
535
536 not_done_yet = n_args;
537
538 stack_limit = cee->regparms;
539
540 /* ------ BEGIN marshall all arguments ------ */
541
542 /* Push (R to L) the stack-passed args, [n_args-1 .. stack_limit] */
543 for (i = n_args-1; i >= stack_limit; i--) {
544 n_arg_ws += pushArg(env, args[i], r_vecRetAddr);
545 not_done_yet--;
546 }
547
548 /* args [stack_limit-1 .. 0] and possibly %ebp are to be passed in
549 registers. */
550
551 if (cee->regparms > 0) {
552
553 /* ------ BEGIN deal with regparms ------ */
554
555 /* deal with regparms, not forgetting %ebp if needed. */
556 argregs[0] = hregX86_EAX();
557 argregs[1] = hregX86_EDX();
558 argregs[2] = hregX86_ECX();
559 tmpregs[0] = tmpregs[1] = tmpregs[2] = INVALID_HREG;
560
561 argreg = cee->regparms;
562
563 /* In keeping with big comment above, detect potential danger
564 and use the via-vregs scheme if needed. */
565 danger = False;
566 for (i = stack_limit-1; i >= 0; i--) {
567 if (mightRequireFixedRegs(args[i])) {
568 danger = True;
569 break;
570 }
571 }
572
573 if (danger) {
574
575 /* Move via temporaries */
576 argregX = argreg;
577 for (i = stack_limit-1; i >= 0; i--) {
578
579 if (0) {
580 vex_printf("x86 host: register param is complex: ");
581 ppIRExpr(args[i]);
582 vex_printf("\n");
583 }
584
585 IRExpr* arg = args[i];
586 argreg--;
587 vassert(argreg >= 0);
588 if (UNLIKELY(arg->tag == Iex_VECRET)) {
589 vassert(0); //ATC
590 }
591 else if (UNLIKELY(arg->tag == Iex_BBPTR)) {
592 vassert(0); //ATC
593 } else {
594 vassert(typeOfIRExpr(env->type_env, arg) == Ity_I32);
595 tmpregs[argreg] = iselIntExpr_R(env, arg);
596 }
597 not_done_yet--;
598 }
599 for (i = stack_limit-1; i >= 0; i--) {
600 argregX--;
601 vassert(argregX >= 0);
602 addInstr( env, mk_iMOVsd_RR( tmpregs[argregX], argregs[argregX] ) );
603 }
604
605 } else {
606 /* It's safe to compute all regparm args directly into their
607 target registers. */
608 for (i = stack_limit-1; i >= 0; i--) {
609 IRExpr* arg = args[i];
610 argreg--;
611 vassert(argreg >= 0);
612 if (UNLIKELY(arg->tag == Iex_VECRET)) {
613 vassert(!hregIsInvalid(r_vecRetAddr));
614 addInstr(env, X86Instr_Alu32R(Xalu_MOV,
615 X86RMI_Reg(r_vecRetAddr),
616 argregs[argreg]));
617 }
618 else if (UNLIKELY(arg->tag == Iex_BBPTR)) {
619 vassert(0); //ATC
620 } else {
621 vassert(typeOfIRExpr(env->type_env, arg) == Ity_I32);
622 addInstr(env, X86Instr_Alu32R(Xalu_MOV,
623 iselIntExpr_RMI(env, arg),
624 argregs[argreg]));
625 }
626 not_done_yet--;
627 }
628
629 }
630
631 /* ------ END deal with regparms ------ */
632
633 }
634
635 vassert(not_done_yet == 0);
636
637 /* ------ END marshall all arguments ------ */
638
639 /* Now we can compute the condition. We can't do it earlier
640 because the argument computations could trash the condition
641 codes. Be a bit clever to handle the common case where the
642 guard is 1:Bit. */
643 cc = Xcc_ALWAYS;
644 if (guard) {
645 if (guard->tag == Iex_Const
646 && guard->Iex.Const.con->tag == Ico_U1
647 && guard->Iex.Const.con->Ico.U1 == True) {
648 /* unconditional -- do nothing */
649 } else {
650 cc = iselCondCode( env, guard );
651 }
652 }
653
654 /* Do final checks, set the return values, and generate the call
655 instruction proper. */
656 vassert(*stackAdjustAfterCall == 0);
657 vassert(is_RetLoc_INVALID(*retloc));
658 switch (retTy) {
659 case Ity_INVALID:
660 /* Function doesn't return a value. */
661 *retloc = mk_RetLoc_simple(RLPri_None);
662 break;
663 case Ity_I64:
664 *retloc = mk_RetLoc_simple(RLPri_2Int);
665 break;
666 case Ity_I32: case Ity_I16: case Ity_I8:
667 *retloc = mk_RetLoc_simple(RLPri_Int);
668 break;
669 case Ity_V128:
670 *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
671 *stackAdjustAfterCall = 16;
672 break;
673 case Ity_V256:
674 vassert(0); // ATC
675 *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
676 *stackAdjustAfterCall = 32;
677 break;
678 default:
679 /* IR can denote other possible return types, but we don't
680 handle those here. */
681 vassert(0);
682 }
683
684 /* Finally, generate the call itself. This needs the *retloc value
685 set in the switch above, which is why it's at the end. */
686 callHelperAndClearArgs( env, cc, cee, n_arg_ws, *retloc );
687 }
688
689
690 /* Given a guest-state array descriptor, an index expression and a
691 bias, generate an X86AMode holding the relevant guest state
692 offset. */
693
694 static
genGuestArrayOffset(ISelEnv * env,IRRegArray * descr,IRExpr * off,Int bias)695 X86AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
696 IRExpr* off, Int bias )
697 {
698 HReg tmp, roff;
699 Int elemSz = sizeofIRType(descr->elemTy);
700 Int nElems = descr->nElems;
701 Int shift = 0;
702
703 /* throw out any cases not generated by an x86 front end. In
704 theory there might be a day where we need to handle them -- if
705 we ever run non-x86-guest on x86 host. */
706
707 if (nElems != 8)
708 vpanic("genGuestArrayOffset(x86 host)(1)");
709
710 switch (elemSz) {
711 case 1: shift = 0; break;
712 case 4: shift = 2; break;
713 case 8: shift = 3; break;
714 default: vpanic("genGuestArrayOffset(x86 host)(2)");
715 }
716
717 /* Compute off into a reg, %off. Then return:
718
719 movl %off, %tmp
720 addl $bias, %tmp (if bias != 0)
721 andl %tmp, 7
722 ... base(%ebp, %tmp, shift) ...
723 */
724 tmp = newVRegI(env);
725 roff = iselIntExpr_R(env, off);
726 addInstr(env, mk_iMOVsd_RR(roff, tmp));
727 if (bias != 0) {
728 addInstr(env,
729 X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(bias), tmp));
730 }
731 addInstr(env,
732 X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(7), tmp));
733 return
734 X86AMode_IRRS( descr->base, hregX86_EBP(), tmp, shift );
735 }
736
737
738 /* Mess with the FPU's rounding mode: set to the default rounding mode
739 (DEFAULT_FPUCW). */
740 static
set_FPU_rounding_default(ISelEnv * env)741 void set_FPU_rounding_default ( ISelEnv* env )
742 {
743 /* pushl $DEFAULT_FPUCW
744 fldcw 0(%esp)
745 addl $4, %esp
746 */
747 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
748 addInstr(env, X86Instr_Push(X86RMI_Imm(DEFAULT_FPUCW)));
749 addInstr(env, X86Instr_FpLdCW(zero_esp));
750 add_to_esp(env, 4);
751 }
752
753
754 /* Mess with the FPU's rounding mode: 'mode' is an I32-typed
755 expression denoting a value in the range 0 .. 3, indicating a round
756 mode encoded as per type IRRoundingMode. Set the x87 FPU to have
757 the same rounding.
758 */
759 static
set_FPU_rounding_mode(ISelEnv * env,IRExpr * mode)760 void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
761 {
762 HReg rrm = iselIntExpr_R(env, mode);
763 HReg rrm2 = newVRegI(env);
764 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
765
766 /* movl %rrm, %rrm2
767 andl $3, %rrm2 -- shouldn't be needed; paranoia
768 shll $10, %rrm2
769 orl $DEFAULT_FPUCW, %rrm2
770 pushl %rrm2
771 fldcw 0(%esp)
772 addl $4, %esp
773 */
774 addInstr(env, mk_iMOVsd_RR(rrm, rrm2));
775 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(3), rrm2));
776 addInstr(env, X86Instr_Sh32(Xsh_SHL, 10, rrm2));
777 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Imm(DEFAULT_FPUCW), rrm2));
778 addInstr(env, X86Instr_Push(X86RMI_Reg(rrm2)));
779 addInstr(env, X86Instr_FpLdCW(zero_esp));
780 add_to_esp(env, 4);
781 }
782
783
784 /* Generate !src into a new vector register, and be sure that the code
785 is SSE1 compatible. Amazing that Intel doesn't offer a less crappy
786 way to do this.
787 */
do_sse_Not128(ISelEnv * env,HReg src)788 static HReg do_sse_Not128 ( ISelEnv* env, HReg src )
789 {
790 HReg dst = newVRegV(env);
791 /* Set dst to zero. If dst contains a NaN then all hell might
792 break loose after the comparison. So, first zero it. */
793 addInstr(env, X86Instr_SseReRg(Xsse_XOR, dst, dst));
794 /* And now make it all 1s ... */
795 addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, dst, dst));
796 /* Finally, xor 'src' into it. */
797 addInstr(env, X86Instr_SseReRg(Xsse_XOR, src, dst));
798 /* Doesn't that just totally suck? */
799 return dst;
800 }
801
802
803 /* Round an x87 FPU value to 53-bit-mantissa precision, to be used
804 after most non-simple FPU operations (simple = +, -, *, / and
805 sqrt).
806
807 This could be done a lot more efficiently if needed, by loading
808 zero and adding it to the value to be rounded (fldz ; faddp?).
809 */
roundToF64(ISelEnv * env,HReg reg)810 static void roundToF64 ( ISelEnv* env, HReg reg )
811 {
812 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
813 sub_from_esp(env, 8);
814 addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, reg, zero_esp));
815 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, reg, zero_esp));
816 add_to_esp(env, 8);
817 }
818
819
820 /*---------------------------------------------------------*/
821 /*--- ISEL: Integer expressions (32/16/8 bit) ---*/
822 /*---------------------------------------------------------*/
823
824 /* Select insns for an integer-typed expression, and add them to the
825 code list. Return a reg holding the result. This reg will be a
826 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
827 want to modify it, ask for a new vreg, copy it in there, and modify
828 the copy. The register allocator will do its best to map both
829 vregs to the same real register, so the copies will often disappear
830 later in the game.
831
832 This should handle expressions of 32, 16 and 8-bit type. All
833 results are returned in a 32-bit register. For 16- and 8-bit
834 expressions, the upper 16/24 bits are arbitrary, so you should mask
835 or sign extend partial values if necessary.
836 */
837
iselIntExpr_R(ISelEnv * env,IRExpr * e)838 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
839 {
840 HReg r = iselIntExpr_R_wrk(env, e);
841 /* sanity checks ... */
842 # if 0
843 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
844 # endif
845 vassert(hregClass(r) == HRcInt32);
846 vassert(hregIsVirtual(r));
847 return r;
848 }
849
850 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_R_wrk(ISelEnv * env,IRExpr * e)851 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
852 {
853 MatchInfo mi;
854
855 IRType ty = typeOfIRExpr(env->type_env,e);
856 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
857
858 switch (e->tag) {
859
860 /* --------- TEMP --------- */
861 case Iex_RdTmp: {
862 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
863 }
864
865 /* --------- LOAD --------- */
866 case Iex_Load: {
867 HReg dst = newVRegI(env);
868 X86AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr );
869
870 /* We can't handle big-endian loads, nor load-linked. */
871 if (e->Iex.Load.end != Iend_LE)
872 goto irreducible;
873
874 if (ty == Ity_I32) {
875 addInstr(env, X86Instr_Alu32R(Xalu_MOV,
876 X86RMI_Mem(amode), dst) );
877 return dst;
878 }
879 if (ty == Ity_I16) {
880 addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
881 return dst;
882 }
883 if (ty == Ity_I8) {
884 addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
885 return dst;
886 }
887 break;
888 }
889
890 /* --------- TERNARY OP --------- */
891 case Iex_Triop: {
892 IRTriop *triop = e->Iex.Triop.details;
893 /* C3210 flags following FPU partial remainder (fprem), both
894 IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
895 if (triop->op == Iop_PRemC3210F64
896 || triop->op == Iop_PRem1C3210F64) {
897 HReg junk = newVRegF(env);
898 HReg dst = newVRegI(env);
899 HReg srcL = iselDblExpr(env, triop->arg2);
900 HReg srcR = iselDblExpr(env, triop->arg3);
901 /* XXXROUNDINGFIXME */
902 /* set roundingmode here */
903 addInstr(env, X86Instr_FpBinary(
904 e->Iex.Binop.op==Iop_PRemC3210F64
905 ? Xfp_PREM : Xfp_PREM1,
906 srcL,srcR,junk
907 ));
908 /* The previous pseudo-insn will have left the FPU's C3210
909 flags set correctly. So bag them. */
910 addInstr(env, X86Instr_FpStSW_AX());
911 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
912 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
913 return dst;
914 }
915
916 break;
917 }
918
919 /* --------- BINARY OP --------- */
920 case Iex_Binop: {
921 X86AluOp aluOp;
922 X86ShiftOp shOp;
923
924 /* Pattern: Sub32(0,x) */
925 if (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1)) {
926 HReg dst = newVRegI(env);
927 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2);
928 addInstr(env, mk_iMOVsd_RR(reg,dst));
929 addInstr(env, X86Instr_Unary32(Xun_NEG,dst));
930 return dst;
931 }
932
933 /* Is it an addition or logical style op? */
934 switch (e->Iex.Binop.op) {
935 case Iop_Add8: case Iop_Add16: case Iop_Add32:
936 aluOp = Xalu_ADD; break;
937 case Iop_Sub8: case Iop_Sub16: case Iop_Sub32:
938 aluOp = Xalu_SUB; break;
939 case Iop_And8: case Iop_And16: case Iop_And32:
940 aluOp = Xalu_AND; break;
941 case Iop_Or8: case Iop_Or16: case Iop_Or32:
942 aluOp = Xalu_OR; break;
943 case Iop_Xor8: case Iop_Xor16: case Iop_Xor32:
944 aluOp = Xalu_XOR; break;
945 case Iop_Mul16: case Iop_Mul32:
946 aluOp = Xalu_MUL; break;
947 default:
948 aluOp = Xalu_INVALID; break;
949 }
950 /* For commutative ops we assume any literal
951 values are on the second operand. */
952 if (aluOp != Xalu_INVALID) {
953 HReg dst = newVRegI(env);
954 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
955 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
956 addInstr(env, mk_iMOVsd_RR(reg,dst));
957 addInstr(env, X86Instr_Alu32R(aluOp, rmi, dst));
958 return dst;
959 }
960 /* Could do better here; forcing the first arg into a reg
961 isn't always clever.
962 -- t70 = Xor32(And32(Xor32(LDle:I32(Add32(t41,0xFFFFFFA0:I32)),
963 LDle:I32(Add32(t41,0xFFFFFFA4:I32))),LDle:I32(Add32(
964 t41,0xFFFFFFA8:I32))),LDle:I32(Add32(t41,0xFFFFFFA0:I32)))
965 movl 0xFFFFFFA0(%vr41),%vr107
966 movl 0xFFFFFFA4(%vr41),%vr108
967 movl %vr107,%vr106
968 xorl %vr108,%vr106
969 movl 0xFFFFFFA8(%vr41),%vr109
970 movl %vr106,%vr105
971 andl %vr109,%vr105
972 movl 0xFFFFFFA0(%vr41),%vr110
973 movl %vr105,%vr104
974 xorl %vr110,%vr104
975 movl %vr104,%vr70
976 */
977
978 /* Perhaps a shift op? */
979 switch (e->Iex.Binop.op) {
980 case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
981 shOp = Xsh_SHL; break;
982 case Iop_Shr32: case Iop_Shr16: case Iop_Shr8:
983 shOp = Xsh_SHR; break;
984 case Iop_Sar32: case Iop_Sar16: case Iop_Sar8:
985 shOp = Xsh_SAR; break;
986 default:
987 shOp = Xsh_INVALID; break;
988 }
989 if (shOp != Xsh_INVALID) {
990 HReg dst = newVRegI(env);
991
992 /* regL = the value to be shifted */
993 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
994 addInstr(env, mk_iMOVsd_RR(regL,dst));
995
996 /* Do any necessary widening for 16/8 bit operands */
997 switch (e->Iex.Binop.op) {
998 case Iop_Shr8:
999 addInstr(env, X86Instr_Alu32R(
1000 Xalu_AND, X86RMI_Imm(0xFF), dst));
1001 break;
1002 case Iop_Shr16:
1003 addInstr(env, X86Instr_Alu32R(
1004 Xalu_AND, X86RMI_Imm(0xFFFF), dst));
1005 break;
1006 case Iop_Sar8:
1007 addInstr(env, X86Instr_Sh32(Xsh_SHL, 24, dst));
1008 addInstr(env, X86Instr_Sh32(Xsh_SAR, 24, dst));
1009 break;
1010 case Iop_Sar16:
1011 addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, dst));
1012 addInstr(env, X86Instr_Sh32(Xsh_SAR, 16, dst));
1013 break;
1014 default: break;
1015 }
1016
1017 /* Now consider the shift amount. If it's a literal, we
1018 can do a much better job than the general case. */
1019 if (e->Iex.Binop.arg2->tag == Iex_Const) {
1020 /* assert that the IR is well-typed */
1021 Int nshift;
1022 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1023 nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1024 vassert(nshift >= 0);
1025 if (nshift > 0)
1026 /* Can't allow nshift==0 since that means %cl */
1027 addInstr(env, X86Instr_Sh32( shOp, nshift, dst ));
1028 } else {
1029 /* General case; we have to force the amount into %cl. */
1030 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1031 addInstr(env, mk_iMOVsd_RR(regR,hregX86_ECX()));
1032 addInstr(env, X86Instr_Sh32(shOp, 0/* %cl */, dst));
1033 }
1034 return dst;
1035 }
1036
1037 /* Handle misc other ops. */
1038
1039 if (e->Iex.Binop.op == Iop_Max32U) {
1040 HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1041 HReg dst = newVRegI(env);
1042 HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
1043 addInstr(env, mk_iMOVsd_RR(src1,dst));
1044 addInstr(env, X86Instr_Alu32R(Xalu_CMP, X86RMI_Reg(src2), dst));
1045 addInstr(env, X86Instr_CMov32(Xcc_B, X86RM_Reg(src2), dst));
1046 return dst;
1047 }
1048
1049 if (e->Iex.Binop.op == Iop_8HLto16) {
1050 HReg hi8 = newVRegI(env);
1051 HReg lo8 = newVRegI(env);
1052 HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1053 HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1054 addInstr(env, mk_iMOVsd_RR(hi8s, hi8));
1055 addInstr(env, mk_iMOVsd_RR(lo8s, lo8));
1056 addInstr(env, X86Instr_Sh32(Xsh_SHL, 8, hi8));
1057 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFF), lo8));
1058 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo8), hi8));
1059 return hi8;
1060 }
1061
1062 if (e->Iex.Binop.op == Iop_16HLto32) {
1063 HReg hi16 = newVRegI(env);
1064 HReg lo16 = newVRegI(env);
1065 HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1066 HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1067 addInstr(env, mk_iMOVsd_RR(hi16s, hi16));
1068 addInstr(env, mk_iMOVsd_RR(lo16s, lo16));
1069 addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, hi16));
1070 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFFFF), lo16));
1071 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo16), hi16));
1072 return hi16;
1073 }
1074
1075 if (e->Iex.Binop.op == Iop_MullS16 || e->Iex.Binop.op == Iop_MullS8
1076 || e->Iex.Binop.op == Iop_MullU16 || e->Iex.Binop.op == Iop_MullU8) {
1077 HReg a16 = newVRegI(env);
1078 HReg b16 = newVRegI(env);
1079 HReg a16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1080 HReg b16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1081 Int shift = (e->Iex.Binop.op == Iop_MullS8
1082 || e->Iex.Binop.op == Iop_MullU8)
1083 ? 24 : 16;
1084 X86ShiftOp shr_op = (e->Iex.Binop.op == Iop_MullS8
1085 || e->Iex.Binop.op == Iop_MullS16)
1086 ? Xsh_SAR : Xsh_SHR;
1087
1088 addInstr(env, mk_iMOVsd_RR(a16s, a16));
1089 addInstr(env, mk_iMOVsd_RR(b16s, b16));
1090 addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, a16));
1091 addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, b16));
1092 addInstr(env, X86Instr_Sh32(shr_op, shift, a16));
1093 addInstr(env, X86Instr_Sh32(shr_op, shift, b16));
1094 addInstr(env, X86Instr_Alu32R(Xalu_MUL, X86RMI_Reg(a16), b16));
1095 return b16;
1096 }
1097
1098 if (e->Iex.Binop.op == Iop_CmpF64) {
1099 HReg fL = iselDblExpr(env, e->Iex.Binop.arg1);
1100 HReg fR = iselDblExpr(env, e->Iex.Binop.arg2);
1101 HReg dst = newVRegI(env);
1102 addInstr(env, X86Instr_FpCmp(fL,fR,dst));
1103 /* shift this right 8 bits so as to conform to CmpF64
1104 definition. */
1105 addInstr(env, X86Instr_Sh32(Xsh_SHR, 8, dst));
1106 return dst;
1107 }
1108
1109 if (e->Iex.Binop.op == Iop_F64toI32S
1110 || e->Iex.Binop.op == Iop_F64toI16S) {
1111 Int sz = e->Iex.Binop.op == Iop_F64toI16S ? 2 : 4;
1112 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
1113 HReg dst = newVRegI(env);
1114
1115 /* Used several times ... */
1116 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
1117
1118 /* rf now holds the value to be converted, and rrm holds the
1119 rounding mode value, encoded as per the IRRoundingMode
1120 enum. The first thing to do is set the FPU's rounding
1121 mode accordingly. */
1122
1123 /* Create a space for the format conversion. */
1124 /* subl $4, %esp */
1125 sub_from_esp(env, 4);
1126
1127 /* Set host rounding mode */
1128 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
1129
1130 /* gistw/l %rf, 0(%esp) */
1131 addInstr(env, X86Instr_FpLdStI(False/*store*/,
1132 toUChar(sz), rf, zero_esp));
1133
1134 if (sz == 2) {
1135 /* movzwl 0(%esp), %dst */
1136 addInstr(env, X86Instr_LoadEX(2,False,zero_esp,dst));
1137 } else {
1138 /* movl 0(%esp), %dst */
1139 vassert(sz == 4);
1140 addInstr(env, X86Instr_Alu32R(
1141 Xalu_MOV, X86RMI_Mem(zero_esp), dst));
1142 }
1143
1144 /* Restore default FPU rounding. */
1145 set_FPU_rounding_default( env );
1146
1147 /* addl $4, %esp */
1148 add_to_esp(env, 4);
1149 return dst;
1150 }
1151
1152 break;
1153 }
1154
1155 /* --------- UNARY OP --------- */
1156 case Iex_Unop: {
1157
1158 /* 1Uto8(32to1(expr32)) */
1159 if (e->Iex.Unop.op == Iop_1Uto8) {
1160 DECLARE_PATTERN(p_32to1_then_1Uto8);
1161 DEFINE_PATTERN(p_32to1_then_1Uto8,
1162 unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
1163 if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
1164 IRExpr* expr32 = mi.bindee[0];
1165 HReg dst = newVRegI(env);
1166 HReg src = iselIntExpr_R(env, expr32);
1167 addInstr(env, mk_iMOVsd_RR(src,dst) );
1168 addInstr(env, X86Instr_Alu32R(Xalu_AND,
1169 X86RMI_Imm(1), dst));
1170 return dst;
1171 }
1172 }
1173
1174 /* 8Uto32(LDle(expr32)) */
1175 if (e->Iex.Unop.op == Iop_8Uto32) {
1176 DECLARE_PATTERN(p_LDle8_then_8Uto32);
1177 DEFINE_PATTERN(p_LDle8_then_8Uto32,
1178 unop(Iop_8Uto32,
1179 IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1180 if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
1181 HReg dst = newVRegI(env);
1182 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1183 addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1184 return dst;
1185 }
1186 }
1187
1188 /* 8Sto32(LDle(expr32)) */
1189 if (e->Iex.Unop.op == Iop_8Sto32) {
1190 DECLARE_PATTERN(p_LDle8_then_8Sto32);
1191 DEFINE_PATTERN(p_LDle8_then_8Sto32,
1192 unop(Iop_8Sto32,
1193 IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1194 if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
1195 HReg dst = newVRegI(env);
1196 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1197 addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
1198 return dst;
1199 }
1200 }
1201
1202 /* 16Uto32(LDle(expr32)) */
1203 if (e->Iex.Unop.op == Iop_16Uto32) {
1204 DECLARE_PATTERN(p_LDle16_then_16Uto32);
1205 DEFINE_PATTERN(p_LDle16_then_16Uto32,
1206 unop(Iop_16Uto32,
1207 IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1208 if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
1209 HReg dst = newVRegI(env);
1210 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1211 addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1212 return dst;
1213 }
1214 }
1215
1216 /* 8Uto32(GET:I8) */
1217 if (e->Iex.Unop.op == Iop_8Uto32) {
1218 if (e->Iex.Unop.arg->tag == Iex_Get) {
1219 HReg dst;
1220 X86AMode* amode;
1221 vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
1222 dst = newVRegI(env);
1223 amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1224 hregX86_EBP());
1225 addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1226 return dst;
1227 }
1228 }
1229
1230 /* 16to32(GET:I16) */
1231 if (e->Iex.Unop.op == Iop_16Uto32) {
1232 if (e->Iex.Unop.arg->tag == Iex_Get) {
1233 HReg dst;
1234 X86AMode* amode;
1235 vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
1236 dst = newVRegI(env);
1237 amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1238 hregX86_EBP());
1239 addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1240 return dst;
1241 }
1242 }
1243
1244 switch (e->Iex.Unop.op) {
1245 case Iop_8Uto16:
1246 case Iop_8Uto32:
1247 case Iop_16Uto32: {
1248 HReg dst = newVRegI(env);
1249 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1250 UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
1251 addInstr(env, mk_iMOVsd_RR(src,dst) );
1252 addInstr(env, X86Instr_Alu32R(Xalu_AND,
1253 X86RMI_Imm(mask), dst));
1254 return dst;
1255 }
1256 case Iop_8Sto16:
1257 case Iop_8Sto32:
1258 case Iop_16Sto32: {
1259 HReg dst = newVRegI(env);
1260 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1261 UInt amt = e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24;
1262 addInstr(env, mk_iMOVsd_RR(src,dst) );
1263 addInstr(env, X86Instr_Sh32(Xsh_SHL, amt, dst));
1264 addInstr(env, X86Instr_Sh32(Xsh_SAR, amt, dst));
1265 return dst;
1266 }
1267 case Iop_Not8:
1268 case Iop_Not16:
1269 case Iop_Not32: {
1270 HReg dst = newVRegI(env);
1271 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1272 addInstr(env, mk_iMOVsd_RR(src,dst) );
1273 addInstr(env, X86Instr_Unary32(Xun_NOT,dst));
1274 return dst;
1275 }
1276 case Iop_64HIto32: {
1277 HReg rHi, rLo;
1278 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1279 return rHi; /* and abandon rLo .. poor wee thing :-) */
1280 }
1281 case Iop_64to32: {
1282 HReg rHi, rLo;
1283 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1284 return rLo; /* similar stupid comment to the above ... */
1285 }
1286 case Iop_16HIto8:
1287 case Iop_32HIto16: {
1288 HReg dst = newVRegI(env);
1289 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1290 Int shift = e->Iex.Unop.op == Iop_16HIto8 ? 8 : 16;
1291 addInstr(env, mk_iMOVsd_RR(src,dst) );
1292 addInstr(env, X86Instr_Sh32(Xsh_SHR, shift, dst));
1293 return dst;
1294 }
1295 case Iop_1Uto32:
1296 case Iop_1Uto8: {
1297 HReg dst = newVRegI(env);
1298 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1299 addInstr(env, X86Instr_Set32(cond,dst));
1300 return dst;
1301 }
1302 case Iop_1Sto8:
1303 case Iop_1Sto16:
1304 case Iop_1Sto32: {
1305 /* could do better than this, but for now ... */
1306 HReg dst = newVRegI(env);
1307 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1308 addInstr(env, X86Instr_Set32(cond,dst));
1309 addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
1310 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1311 return dst;
1312 }
1313 case Iop_Ctz32: {
1314 /* Count trailing zeroes, implemented by x86 'bsfl' */
1315 HReg dst = newVRegI(env);
1316 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1317 addInstr(env, X86Instr_Bsfr32(True,src,dst));
1318 return dst;
1319 }
1320 case Iop_Clz32: {
1321 /* Count leading zeroes. Do 'bsrl' to establish the index
1322 of the highest set bit, and subtract that value from
1323 31. */
1324 HReg tmp = newVRegI(env);
1325 HReg dst = newVRegI(env);
1326 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1327 addInstr(env, X86Instr_Bsfr32(False,src,tmp));
1328 addInstr(env, X86Instr_Alu32R(Xalu_MOV,
1329 X86RMI_Imm(31), dst));
1330 addInstr(env, X86Instr_Alu32R(Xalu_SUB,
1331 X86RMI_Reg(tmp), dst));
1332 return dst;
1333 }
1334
1335 case Iop_CmpwNEZ32: {
1336 HReg dst = newVRegI(env);
1337 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1338 addInstr(env, mk_iMOVsd_RR(src,dst));
1339 addInstr(env, X86Instr_Unary32(Xun_NEG,dst));
1340 addInstr(env, X86Instr_Alu32R(Xalu_OR,
1341 X86RMI_Reg(src), dst));
1342 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1343 return dst;
1344 }
1345 case Iop_Left8:
1346 case Iop_Left16:
1347 case Iop_Left32: {
1348 HReg dst = newVRegI(env);
1349 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1350 addInstr(env, mk_iMOVsd_RR(src, dst));
1351 addInstr(env, X86Instr_Unary32(Xun_NEG, dst));
1352 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(src), dst));
1353 return dst;
1354 }
1355
1356 case Iop_V128to32: {
1357 HReg dst = newVRegI(env);
1358 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1359 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
1360 sub_from_esp(env, 16);
1361 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
1362 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
1363 add_to_esp(env, 16);
1364 return dst;
1365 }
1366
1367 /* ReinterpF32asI32(e) */
1368 /* Given an IEEE754 single, produce an I32 with the same bit
1369 pattern. Keep stack 8-aligned even though only using 4
1370 bytes. */
1371 case Iop_ReinterpF32asI32: {
1372 HReg rf = iselFltExpr(env, e->Iex.Unop.arg);
1373 HReg dst = newVRegI(env);
1374 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
1375 /* paranoia */
1376 set_FPU_rounding_default(env);
1377 /* subl $8, %esp */
1378 sub_from_esp(env, 8);
1379 /* gstF %rf, 0(%esp) */
1380 addInstr(env,
1381 X86Instr_FpLdSt(False/*store*/, 4, rf, zero_esp));
1382 /* movl 0(%esp), %dst */
1383 addInstr(env,
1384 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), dst));
1385 /* addl $8, %esp */
1386 add_to_esp(env, 8);
1387 return dst;
1388 }
1389
1390 case Iop_16to8:
1391 case Iop_32to8:
1392 case Iop_32to16:
1393 /* These are no-ops. */
1394 return iselIntExpr_R(env, e->Iex.Unop.arg);
1395
1396 case Iop_GetMSBs8x8: {
1397 /* Note: the following assumes the helper is of
1398 signature
1399 UInt fn ( ULong ), and is not a regparm fn.
1400 */
1401 HReg xLo, xHi;
1402 HReg dst = newVRegI(env);
1403 Addr fn = (Addr)h_generic_calc_GetMSBs8x8;
1404 iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg);
1405 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
1406 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
1407 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
1408 0, mk_RetLoc_simple(RLPri_Int) ));
1409 add_to_esp(env, 2*4);
1410 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1411 return dst;
1412 }
1413
1414 default:
1415 break;
1416 }
1417 break;
1418 }
1419
1420 /* --------- GET --------- */
1421 case Iex_Get: {
1422 if (ty == Ity_I32) {
1423 HReg dst = newVRegI(env);
1424 addInstr(env, X86Instr_Alu32R(
1425 Xalu_MOV,
1426 X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset,
1427 hregX86_EBP())),
1428 dst));
1429 return dst;
1430 }
1431 if (ty == Ity_I8 || ty == Ity_I16) {
1432 HReg dst = newVRegI(env);
1433 addInstr(env, X86Instr_LoadEX(
1434 toUChar(ty==Ity_I8 ? 1 : 2),
1435 False,
1436 X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
1437 dst));
1438 return dst;
1439 }
1440 break;
1441 }
1442
1443 case Iex_GetI: {
1444 X86AMode* am
1445 = genGuestArrayOffset(
1446 env, e->Iex.GetI.descr,
1447 e->Iex.GetI.ix, e->Iex.GetI.bias );
1448 HReg dst = newVRegI(env);
1449 if (ty == Ity_I8) {
1450 addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
1451 return dst;
1452 }
1453 if (ty == Ity_I32) {
1454 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
1455 return dst;
1456 }
1457 break;
1458 }
1459
1460 /* --------- CCALL --------- */
1461 case Iex_CCall: {
1462 HReg dst = newVRegI(env);
1463 vassert(ty == e->Iex.CCall.retty);
1464
1465 /* be very restrictive for now. Only 32/64-bit ints allowed for
1466 args, and 32 bits for return type. Don't forget to change
1467 the RetLoc if more return types are allowed in future. */
1468 if (e->Iex.CCall.retty != Ity_I32)
1469 goto irreducible;
1470
1471 /* Marshal args, do the call, clear stack. */
1472 UInt addToSp = 0;
1473 RetLoc rloc = mk_RetLoc_INVALID();
1474 doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
1475 e->Iex.CCall.cee, e->Iex.CCall.retty, e->Iex.CCall.args );
1476 vassert(is_sane_RetLoc(rloc));
1477 vassert(rloc.pri == RLPri_Int);
1478 vassert(addToSp == 0);
1479
1480 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1481 return dst;
1482 }
1483
1484 /* --------- LITERAL --------- */
1485 /* 32/16/8-bit literals */
1486 case Iex_Const: {
1487 X86RMI* rmi = iselIntExpr_RMI ( env, e );
1488 HReg r = newVRegI(env);
1489 addInstr(env, X86Instr_Alu32R(Xalu_MOV, rmi, r));
1490 return r;
1491 }
1492
1493 /* --------- MULTIPLEX --------- */
1494 case Iex_ITE: { // VFD
1495 if ((ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8)
1496 && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
1497 HReg r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue);
1498 X86RM* r0 = iselIntExpr_RM(env, e->Iex.ITE.iffalse);
1499 HReg dst = newVRegI(env);
1500 addInstr(env, mk_iMOVsd_RR(r1,dst));
1501 X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
1502 addInstr(env, X86Instr_CMov32(cc ^ 1, r0, dst));
1503 return dst;
1504 }
1505 break;
1506 }
1507
1508 default:
1509 break;
1510 } /* switch (e->tag) */
1511
1512 /* We get here if no pattern matched. */
1513 irreducible:
1514 ppIRExpr(e);
1515 vpanic("iselIntExpr_R: cannot reduce tree");
1516 }
1517
1518
1519 /*---------------------------------------------------------*/
1520 /*--- ISEL: Integer expression auxiliaries ---*/
1521 /*---------------------------------------------------------*/
1522
1523 /* --------------------- AMODEs --------------------- */
1524
1525 /* Return an AMode which computes the value of the specified
1526 expression, possibly also adding insns to the code list as a
1527 result. The expression may only be a 32-bit one.
1528 */
1529
sane_AMode(X86AMode * am)1530 static Bool sane_AMode ( X86AMode* am )
1531 {
1532 switch (am->tag) {
1533 case Xam_IR:
1534 return
1535 toBool( hregClass(am->Xam.IR.reg) == HRcInt32
1536 && (hregIsVirtual(am->Xam.IR.reg)
1537 || sameHReg(am->Xam.IR.reg, hregX86_EBP())) );
1538 case Xam_IRRS:
1539 return
1540 toBool( hregClass(am->Xam.IRRS.base) == HRcInt32
1541 && hregIsVirtual(am->Xam.IRRS.base)
1542 && hregClass(am->Xam.IRRS.index) == HRcInt32
1543 && hregIsVirtual(am->Xam.IRRS.index) );
1544 default:
1545 vpanic("sane_AMode: unknown x86 amode tag");
1546 }
1547 }
1548
iselIntExpr_AMode(ISelEnv * env,IRExpr * e)1549 static X86AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e )
1550 {
1551 X86AMode* am = iselIntExpr_AMode_wrk(env, e);
1552 vassert(sane_AMode(am));
1553 return am;
1554 }
1555
1556 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_AMode_wrk(ISelEnv * env,IRExpr * e)1557 static X86AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e )
1558 {
1559 IRType ty = typeOfIRExpr(env->type_env,e);
1560 vassert(ty == Ity_I32);
1561
1562 /* Add32( Add32(expr1, Shl32(expr2, simm)), imm32 ) */
1563 if (e->tag == Iex_Binop
1564 && e->Iex.Binop.op == Iop_Add32
1565 && e->Iex.Binop.arg2->tag == Iex_Const
1566 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32
1567 && e->Iex.Binop.arg1->tag == Iex_Binop
1568 && e->Iex.Binop.arg1->Iex.Binop.op == Iop_Add32
1569 && e->Iex.Binop.arg1->Iex.Binop.arg2->tag == Iex_Binop
1570 && e->Iex.Binop.arg1->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32
1571 && e->Iex.Binop.arg1
1572 ->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
1573 && e->Iex.Binop.arg1
1574 ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
1575 UInt shift = e->Iex.Binop.arg1
1576 ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1577 UInt imm32 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1578 if (shift == 1 || shift == 2 || shift == 3) {
1579 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1->Iex.Binop.arg1);
1580 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg1
1581 ->Iex.Binop.arg2->Iex.Binop.arg1 );
1582 return X86AMode_IRRS(imm32, r1, r2, shift);
1583 }
1584 }
1585
1586 /* Add32(expr1, Shl32(expr2, imm)) */
1587 if (e->tag == Iex_Binop
1588 && e->Iex.Binop.op == Iop_Add32
1589 && e->Iex.Binop.arg2->tag == Iex_Binop
1590 && e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32
1591 && e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
1592 && e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
1593 UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1594 if (shift == 1 || shift == 2 || shift == 3) {
1595 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1596 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 );
1597 return X86AMode_IRRS(0, r1, r2, shift);
1598 }
1599 }
1600
1601 /* Add32(expr,i) */
1602 if (e->tag == Iex_Binop
1603 && e->Iex.Binop.op == Iop_Add32
1604 && e->Iex.Binop.arg2->tag == Iex_Const
1605 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
1606 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1607 return X86AMode_IR(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32, r1);
1608 }
1609
1610 /* Doesn't match anything in particular. Generate it into
1611 a register and use that. */
1612 {
1613 HReg r1 = iselIntExpr_R(env, e);
1614 return X86AMode_IR(0, r1);
1615 }
1616 }
1617
1618
1619 /* --------------------- RMIs --------------------- */
1620
1621 /* Similarly, calculate an expression into an X86RMI operand. As with
1622 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1623
iselIntExpr_RMI(ISelEnv * env,IRExpr * e)1624 static X86RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e )
1625 {
1626 X86RMI* rmi = iselIntExpr_RMI_wrk(env, e);
1627 /* sanity checks ... */
1628 switch (rmi->tag) {
1629 case Xrmi_Imm:
1630 return rmi;
1631 case Xrmi_Reg:
1632 vassert(hregClass(rmi->Xrmi.Reg.reg) == HRcInt32);
1633 vassert(hregIsVirtual(rmi->Xrmi.Reg.reg));
1634 return rmi;
1635 case Xrmi_Mem:
1636 vassert(sane_AMode(rmi->Xrmi.Mem.am));
1637 return rmi;
1638 default:
1639 vpanic("iselIntExpr_RMI: unknown x86 RMI tag");
1640 }
1641 }
1642
1643 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RMI_wrk(ISelEnv * env,IRExpr * e)1644 static X86RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e )
1645 {
1646 IRType ty = typeOfIRExpr(env->type_env,e);
1647 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1648
1649 /* special case: immediate */
1650 if (e->tag == Iex_Const) {
1651 UInt u;
1652 switch (e->Iex.Const.con->tag) {
1653 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1654 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1655 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
1656 default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)");
1657 }
1658 return X86RMI_Imm(u);
1659 }
1660
1661 /* special case: 32-bit GET */
1662 if (e->tag == Iex_Get && ty == Ity_I32) {
1663 return X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset,
1664 hregX86_EBP()));
1665 }
1666
1667 /* special case: 32-bit load from memory */
1668 if (e->tag == Iex_Load && ty == Ity_I32
1669 && e->Iex.Load.end == Iend_LE) {
1670 X86AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
1671 return X86RMI_Mem(am);
1672 }
1673
1674 /* default case: calculate into a register and return that */
1675 {
1676 HReg r = iselIntExpr_R ( env, e );
1677 return X86RMI_Reg(r);
1678 }
1679 }
1680
1681
1682 /* --------------------- RIs --------------------- */
1683
1684 /* Calculate an expression into an X86RI operand. As with
1685 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1686
iselIntExpr_RI(ISelEnv * env,IRExpr * e)1687 static X86RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e )
1688 {
1689 X86RI* ri = iselIntExpr_RI_wrk(env, e);
1690 /* sanity checks ... */
1691 switch (ri->tag) {
1692 case Xri_Imm:
1693 return ri;
1694 case Xri_Reg:
1695 vassert(hregClass(ri->Xri.Reg.reg) == HRcInt32);
1696 vassert(hregIsVirtual(ri->Xri.Reg.reg));
1697 return ri;
1698 default:
1699 vpanic("iselIntExpr_RI: unknown x86 RI tag");
1700 }
1701 }
1702
1703 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RI_wrk(ISelEnv * env,IRExpr * e)1704 static X86RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e )
1705 {
1706 IRType ty = typeOfIRExpr(env->type_env,e);
1707 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1708
1709 /* special case: immediate */
1710 if (e->tag == Iex_Const) {
1711 UInt u;
1712 switch (e->Iex.Const.con->tag) {
1713 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1714 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1715 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
1716 default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)");
1717 }
1718 return X86RI_Imm(u);
1719 }
1720
1721 /* default case: calculate into a register and return that */
1722 {
1723 HReg r = iselIntExpr_R ( env, e );
1724 return X86RI_Reg(r);
1725 }
1726 }
1727
1728
1729 /* --------------------- RMs --------------------- */
1730
1731 /* Similarly, calculate an expression into an X86RM operand. As with
1732 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1733
iselIntExpr_RM(ISelEnv * env,IRExpr * e)1734 static X86RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e )
1735 {
1736 X86RM* rm = iselIntExpr_RM_wrk(env, e);
1737 /* sanity checks ... */
1738 switch (rm->tag) {
1739 case Xrm_Reg:
1740 vassert(hregClass(rm->Xrm.Reg.reg) == HRcInt32);
1741 vassert(hregIsVirtual(rm->Xrm.Reg.reg));
1742 return rm;
1743 case Xrm_Mem:
1744 vassert(sane_AMode(rm->Xrm.Mem.am));
1745 return rm;
1746 default:
1747 vpanic("iselIntExpr_RM: unknown x86 RM tag");
1748 }
1749 }
1750
1751 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RM_wrk(ISelEnv * env,IRExpr * e)1752 static X86RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e )
1753 {
1754 IRType ty = typeOfIRExpr(env->type_env,e);
1755 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1756
1757 /* special case: 32-bit GET */
1758 if (e->tag == Iex_Get && ty == Ity_I32) {
1759 return X86RM_Mem(X86AMode_IR(e->Iex.Get.offset,
1760 hregX86_EBP()));
1761 }
1762
1763 /* special case: load from memory */
1764
1765 /* default case: calculate into a register and return that */
1766 {
1767 HReg r = iselIntExpr_R ( env, e );
1768 return X86RM_Reg(r);
1769 }
1770 }
1771
1772
1773 /* --------------------- CONDCODE --------------------- */
1774
1775 /* Generate code to evaluated a bit-typed expression, returning the
1776 condition code which would correspond when the expression would
1777 notionally have returned 1. */
1778
iselCondCode(ISelEnv * env,IRExpr * e)1779 static X86CondCode iselCondCode ( ISelEnv* env, IRExpr* e )
1780 {
1781 /* Uh, there's nothing we can sanity check here, unfortunately. */
1782 return iselCondCode_wrk(env,e);
1783 }
1784
1785 /* DO NOT CALL THIS DIRECTLY ! */
iselCondCode_wrk(ISelEnv * env,IRExpr * e)1786 static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
1787 {
1788 MatchInfo mi;
1789
1790 vassert(e);
1791 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1792
1793 /* var */
1794 if (e->tag == Iex_RdTmp) {
1795 HReg r32 = lookupIRTemp(env, e->Iex.RdTmp.tmp);
1796 /* Test32 doesn't modify r32; so this is OK. */
1797 addInstr(env, X86Instr_Test32(1,X86RM_Reg(r32)));
1798 return Xcc_NZ;
1799 }
1800
1801 /* Constant 1:Bit */
1802 if (e->tag == Iex_Const) {
1803 HReg r;
1804 vassert(e->Iex.Const.con->tag == Ico_U1);
1805 vassert(e->Iex.Const.con->Ico.U1 == True
1806 || e->Iex.Const.con->Ico.U1 == False);
1807 r = newVRegI(env);
1808 addInstr(env, X86Instr_Alu32R(Xalu_MOV,X86RMI_Imm(0),r));
1809 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(r),r));
1810 return e->Iex.Const.con->Ico.U1 ? Xcc_Z : Xcc_NZ;
1811 }
1812
1813 /* Not1(e) */
1814 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1815 /* Generate code for the arg, and negate the test condition */
1816 return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
1817 }
1818
1819 /* --- patterns rooted at: 32to1 --- */
1820
1821 if (e->tag == Iex_Unop
1822 && e->Iex.Unop.op == Iop_32to1) {
1823 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1824 addInstr(env, X86Instr_Test32(1,rm));
1825 return Xcc_NZ;
1826 }
1827
1828 /* --- patterns rooted at: CmpNEZ8 --- */
1829
1830 /* CmpNEZ8(x) */
1831 if (e->tag == Iex_Unop
1832 && e->Iex.Unop.op == Iop_CmpNEZ8) {
1833 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1834 addInstr(env, X86Instr_Test32(0xFF,rm));
1835 return Xcc_NZ;
1836 }
1837
1838 /* --- patterns rooted at: CmpNEZ16 --- */
1839
1840 /* CmpNEZ16(x) */
1841 if (e->tag == Iex_Unop
1842 && e->Iex.Unop.op == Iop_CmpNEZ16) {
1843 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1844 addInstr(env, X86Instr_Test32(0xFFFF,rm));
1845 return Xcc_NZ;
1846 }
1847
1848 /* --- patterns rooted at: CmpNEZ32 --- */
1849
1850 /* CmpNEZ32(And32(x,y)) */
1851 {
1852 DECLARE_PATTERN(p_CmpNEZ32_And32);
1853 DEFINE_PATTERN(p_CmpNEZ32_And32,
1854 unop(Iop_CmpNEZ32, binop(Iop_And32, bind(0), bind(1))));
1855 if (matchIRExpr(&mi, p_CmpNEZ32_And32, e)) {
1856 HReg r0 = iselIntExpr_R(env, mi.bindee[0]);
1857 X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
1858 HReg tmp = newVRegI(env);
1859 addInstr(env, mk_iMOVsd_RR(r0, tmp));
1860 addInstr(env, X86Instr_Alu32R(Xalu_AND,rmi1,tmp));
1861 return Xcc_NZ;
1862 }
1863 }
1864
1865 /* CmpNEZ32(Or32(x,y)) */
1866 {
1867 DECLARE_PATTERN(p_CmpNEZ32_Or32);
1868 DEFINE_PATTERN(p_CmpNEZ32_Or32,
1869 unop(Iop_CmpNEZ32, binop(Iop_Or32, bind(0), bind(1))));
1870 if (matchIRExpr(&mi, p_CmpNEZ32_Or32, e)) {
1871 HReg r0 = iselIntExpr_R(env, mi.bindee[0]);
1872 X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
1873 HReg tmp = newVRegI(env);
1874 addInstr(env, mk_iMOVsd_RR(r0, tmp));
1875 addInstr(env, X86Instr_Alu32R(Xalu_OR,rmi1,tmp));
1876 return Xcc_NZ;
1877 }
1878 }
1879
1880 /* CmpNEZ32(GET(..):I32) */
1881 if (e->tag == Iex_Unop
1882 && e->Iex.Unop.op == Iop_CmpNEZ32
1883 && e->Iex.Unop.arg->tag == Iex_Get) {
1884 X86AMode* am = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1885 hregX86_EBP());
1886 addInstr(env, X86Instr_Alu32M(Xalu_CMP, X86RI_Imm(0), am));
1887 return Xcc_NZ;
1888 }
1889
1890 /* CmpNEZ32(x) */
1891 if (e->tag == Iex_Unop
1892 && e->Iex.Unop.op == Iop_CmpNEZ32) {
1893 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1894 X86RMI* rmi2 = X86RMI_Imm(0);
1895 addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1));
1896 return Xcc_NZ;
1897 }
1898
1899 /* --- patterns rooted at: CmpNEZ64 --- */
1900
1901 /* CmpNEZ64(Or64(x,y)) */
1902 {
1903 DECLARE_PATTERN(p_CmpNEZ64_Or64);
1904 DEFINE_PATTERN(p_CmpNEZ64_Or64,
1905 unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1))));
1906 if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) {
1907 HReg hi1, lo1, hi2, lo2;
1908 HReg tmp = newVRegI(env);
1909 iselInt64Expr( &hi1, &lo1, env, mi.bindee[0] );
1910 addInstr(env, mk_iMOVsd_RR(hi1, tmp));
1911 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo1),tmp));
1912 iselInt64Expr( &hi2, &lo2, env, mi.bindee[1] );
1913 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(hi2),tmp));
1914 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo2),tmp));
1915 return Xcc_NZ;
1916 }
1917 }
1918
1919 /* CmpNEZ64(x) */
1920 if (e->tag == Iex_Unop
1921 && e->Iex.Unop.op == Iop_CmpNEZ64) {
1922 HReg hi, lo;
1923 HReg tmp = newVRegI(env);
1924 iselInt64Expr( &hi, &lo, env, e->Iex.Unop.arg );
1925 addInstr(env, mk_iMOVsd_RR(hi, tmp));
1926 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo), tmp));
1927 return Xcc_NZ;
1928 }
1929
1930 /* --- patterns rooted at: Cmp{EQ,NE}{8,16} --- */
1931
1932 /* CmpEQ8 / CmpNE8 */
1933 if (e->tag == Iex_Binop
1934 && (e->Iex.Binop.op == Iop_CmpEQ8
1935 || e->Iex.Binop.op == Iop_CmpNE8
1936 || e->Iex.Binop.op == Iop_CasCmpEQ8
1937 || e->Iex.Binop.op == Iop_CasCmpNE8)) {
1938 if (isZeroU8(e->Iex.Binop.arg2)) {
1939 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1940 addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r1)));
1941 switch (e->Iex.Binop.op) {
1942 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z;
1943 case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ;
1944 default: vpanic("iselCondCode(x86): CmpXX8(expr,0:I8)");
1945 }
1946 } else {
1947 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1948 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1949 HReg r = newVRegI(env);
1950 addInstr(env, mk_iMOVsd_RR(r1,r));
1951 addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r));
1952 addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r)));
1953 switch (e->Iex.Binop.op) {
1954 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z;
1955 case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ;
1956 default: vpanic("iselCondCode(x86): CmpXX8(expr,expr)");
1957 }
1958 }
1959 }
1960
1961 /* CmpEQ16 / CmpNE16 */
1962 if (e->tag == Iex_Binop
1963 && (e->Iex.Binop.op == Iop_CmpEQ16
1964 || e->Iex.Binop.op == Iop_CmpNE16
1965 || e->Iex.Binop.op == Iop_CasCmpEQ16
1966 || e->Iex.Binop.op == Iop_CasCmpNE16
1967 || e->Iex.Binop.op == Iop_ExpCmpNE16)) {
1968 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1969 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1970 HReg r = newVRegI(env);
1971 addInstr(env, mk_iMOVsd_RR(r1,r));
1972 addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r));
1973 addInstr(env, X86Instr_Test32(0xFFFF,X86RM_Reg(r)));
1974 switch (e->Iex.Binop.op) {
1975 case Iop_CmpEQ16: case Iop_CasCmpEQ16:
1976 return Xcc_Z;
1977 case Iop_CmpNE16: case Iop_CasCmpNE16: case Iop_ExpCmpNE16:
1978 return Xcc_NZ;
1979 default:
1980 vpanic("iselCondCode(x86): CmpXX16");
1981 }
1982 }
1983
1984 /* CmpNE32(ccall, 32-bit constant) (--smc-check=all optimisation).
1985 Saves a "movl %eax, %tmp" compared to the default route. */
1986 if (e->tag == Iex_Binop
1987 && e->Iex.Binop.op == Iop_CmpNE32
1988 && e->Iex.Binop.arg1->tag == Iex_CCall
1989 && e->Iex.Binop.arg2->tag == Iex_Const) {
1990 IRExpr* cal = e->Iex.Binop.arg1;
1991 IRExpr* con = e->Iex.Binop.arg2;
1992 /* clone & partial-eval of generic Iex_CCall and Iex_Const cases */
1993 vassert(cal->Iex.CCall.retty == Ity_I32); /* else ill-typed IR */
1994 vassert(con->Iex.Const.con->tag == Ico_U32);
1995 /* Marshal args, do the call. */
1996 UInt addToSp = 0;
1997 RetLoc rloc = mk_RetLoc_INVALID();
1998 doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
1999 cal->Iex.CCall.cee,
2000 cal->Iex.CCall.retty, cal->Iex.CCall.args );
2001 vassert(is_sane_RetLoc(rloc));
2002 vassert(rloc.pri == RLPri_Int);
2003 vassert(addToSp == 0);
2004 /* */
2005 addInstr(env, X86Instr_Alu32R(Xalu_CMP,
2006 X86RMI_Imm(con->Iex.Const.con->Ico.U32),
2007 hregX86_EAX()));
2008 return Xcc_NZ;
2009 }
2010
2011 /* Cmp*32*(x,y) */
2012 if (e->tag == Iex_Binop
2013 && (e->Iex.Binop.op == Iop_CmpEQ32
2014 || e->Iex.Binop.op == Iop_CmpNE32
2015 || e->Iex.Binop.op == Iop_CmpLT32S
2016 || e->Iex.Binop.op == Iop_CmpLT32U
2017 || e->Iex.Binop.op == Iop_CmpLE32S
2018 || e->Iex.Binop.op == Iop_CmpLE32U
2019 || e->Iex.Binop.op == Iop_CasCmpEQ32
2020 || e->Iex.Binop.op == Iop_CasCmpNE32
2021 || e->Iex.Binop.op == Iop_ExpCmpNE32)) {
2022 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2023 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2024 addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1));
2025 switch (e->Iex.Binop.op) {
2026 case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Xcc_Z;
2027 case Iop_CmpNE32:
2028 case Iop_CasCmpNE32: case Iop_ExpCmpNE32: return Xcc_NZ;
2029 case Iop_CmpLT32S: return Xcc_L;
2030 case Iop_CmpLT32U: return Xcc_B;
2031 case Iop_CmpLE32S: return Xcc_LE;
2032 case Iop_CmpLE32U: return Xcc_BE;
2033 default: vpanic("iselCondCode(x86): CmpXX32");
2034 }
2035 }
2036
2037 /* CmpNE64 */
2038 if (e->tag == Iex_Binop
2039 && (e->Iex.Binop.op == Iop_CmpNE64
2040 || e->Iex.Binop.op == Iop_CmpEQ64)) {
2041 HReg hi1, hi2, lo1, lo2;
2042 HReg tHi = newVRegI(env);
2043 HReg tLo = newVRegI(env);
2044 iselInt64Expr( &hi1, &lo1, env, e->Iex.Binop.arg1 );
2045 iselInt64Expr( &hi2, &lo2, env, e->Iex.Binop.arg2 );
2046 addInstr(env, mk_iMOVsd_RR(hi1, tHi));
2047 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(hi2), tHi));
2048 addInstr(env, mk_iMOVsd_RR(lo1, tLo));
2049 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(lo2), tLo));
2050 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(tHi), tLo));
2051 switch (e->Iex.Binop.op) {
2052 case Iop_CmpNE64: return Xcc_NZ;
2053 case Iop_CmpEQ64: return Xcc_Z;
2054 default: vpanic("iselCondCode(x86): CmpXX64");
2055 }
2056 }
2057
2058 ppIRExpr(e);
2059 vpanic("iselCondCode");
2060 }
2061
2062
2063 /*---------------------------------------------------------*/
2064 /*--- ISEL: Integer expressions (64 bit) ---*/
2065 /*---------------------------------------------------------*/
2066
2067 /* Compute a 64-bit value into a register pair, which is returned as
2068 the first two parameters. As with iselIntExpr_R, these may be
2069 either real or virtual regs; in any case they must not be changed
2070 by subsequent code emitted by the caller. */
2071
iselInt64Expr(HReg * rHi,HReg * rLo,ISelEnv * env,IRExpr * e)2072 static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
2073 {
2074 iselInt64Expr_wrk(rHi, rLo, env, e);
2075 # if 0
2076 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2077 # endif
2078 vassert(hregClass(*rHi) == HRcInt32);
2079 vassert(hregIsVirtual(*rHi));
2080 vassert(hregClass(*rLo) == HRcInt32);
2081 vassert(hregIsVirtual(*rLo));
2082 }
2083
2084 /* DO NOT CALL THIS DIRECTLY ! */
iselInt64Expr_wrk(HReg * rHi,HReg * rLo,ISelEnv * env,IRExpr * e)2085 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
2086 {
2087 MatchInfo mi;
2088 HWord fn = 0; /* helper fn for most SIMD64 stuff */
2089 vassert(e);
2090 vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
2091
2092 /* 64-bit literal */
2093 if (e->tag == Iex_Const) {
2094 ULong w64 = e->Iex.Const.con->Ico.U64;
2095 UInt wHi = toUInt(w64 >> 32);
2096 UInt wLo = toUInt(w64);
2097 HReg tLo = newVRegI(env);
2098 HReg tHi = newVRegI(env);
2099 vassert(e->Iex.Const.con->tag == Ico_U64);
2100 if (wLo == wHi) {
2101 /* Save a precious Int register in this special case. */
2102 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
2103 *rHi = tLo;
2104 *rLo = tLo;
2105 } else {
2106 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wHi), tHi));
2107 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
2108 *rHi = tHi;
2109 *rLo = tLo;
2110 }
2111 return;
2112 }
2113
2114 /* read 64-bit IRTemp */
2115 if (e->tag == Iex_RdTmp) {
2116 lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
2117 return;
2118 }
2119
2120 /* 64-bit load */
2121 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2122 HReg tLo, tHi;
2123 X86AMode *am0, *am4;
2124 vassert(e->Iex.Load.ty == Ity_I64);
2125 tLo = newVRegI(env);
2126 tHi = newVRegI(env);
2127 am0 = iselIntExpr_AMode(env, e->Iex.Load.addr);
2128 am4 = advance4(am0);
2129 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am0), tLo ));
2130 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2131 *rHi = tHi;
2132 *rLo = tLo;
2133 return;
2134 }
2135
2136 /* 64-bit GET */
2137 if (e->tag == Iex_Get) {
2138 X86AMode* am = X86AMode_IR(e->Iex.Get.offset, hregX86_EBP());
2139 X86AMode* am4 = advance4(am);
2140 HReg tLo = newVRegI(env);
2141 HReg tHi = newVRegI(env);
2142 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
2143 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2144 *rHi = tHi;
2145 *rLo = tLo;
2146 return;
2147 }
2148
2149 /* 64-bit GETI */
2150 if (e->tag == Iex_GetI) {
2151 X86AMode* am
2152 = genGuestArrayOffset( env, e->Iex.GetI.descr,
2153 e->Iex.GetI.ix, e->Iex.GetI.bias );
2154 X86AMode* am4 = advance4(am);
2155 HReg tLo = newVRegI(env);
2156 HReg tHi = newVRegI(env);
2157 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
2158 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2159 *rHi = tHi;
2160 *rLo = tLo;
2161 return;
2162 }
2163
2164 /* 64-bit ITE: ITE(g, expr, expr) */ // VFD
2165 if (e->tag == Iex_ITE) {
2166 HReg e0Lo, e0Hi, e1Lo, e1Hi;
2167 HReg tLo = newVRegI(env);
2168 HReg tHi = newVRegI(env);
2169 iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.ITE.iffalse);
2170 iselInt64Expr(&e1Hi, &e1Lo, env, e->Iex.ITE.iftrue);
2171 addInstr(env, mk_iMOVsd_RR(e1Hi, tHi));
2172 addInstr(env, mk_iMOVsd_RR(e1Lo, tLo));
2173 X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
2174 /* This assumes the first cmov32 doesn't trash the condition
2175 codes, so they are still available for the second cmov32 */
2176 addInstr(env, X86Instr_CMov32(cc ^ 1, X86RM_Reg(e0Hi), tHi));
2177 addInstr(env, X86Instr_CMov32(cc ^ 1, X86RM_Reg(e0Lo), tLo));
2178 *rHi = tHi;
2179 *rLo = tLo;
2180 return;
2181 }
2182
2183 /* --------- BINARY ops --------- */
2184 if (e->tag == Iex_Binop) {
2185 switch (e->Iex.Binop.op) {
2186 /* 32 x 32 -> 64 multiply */
2187 case Iop_MullU32:
2188 case Iop_MullS32: {
2189 /* get one operand into %eax, and the other into a R/M.
2190 Need to make an educated guess about which is better in
2191 which. */
2192 HReg tLo = newVRegI(env);
2193 HReg tHi = newVRegI(env);
2194 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS32);
2195 X86RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1);
2196 HReg rRight = iselIntExpr_R(env, e->Iex.Binop.arg2);
2197 addInstr(env, mk_iMOVsd_RR(rRight, hregX86_EAX()));
2198 addInstr(env, X86Instr_MulL(syned, rmLeft));
2199 /* Result is now in EDX:EAX. Tell the caller. */
2200 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2201 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2202 *rHi = tHi;
2203 *rLo = tLo;
2204 return;
2205 }
2206
2207 /* 64 x 32 -> (32(rem),32(div)) division */
2208 case Iop_DivModU64to32:
2209 case Iop_DivModS64to32: {
2210 /* Get the 64-bit operand into edx:eax, and the other into
2211 any old R/M. */
2212 HReg sHi, sLo;
2213 HReg tLo = newVRegI(env);
2214 HReg tHi = newVRegI(env);
2215 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS64to32);
2216 X86RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
2217 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2218 addInstr(env, mk_iMOVsd_RR(sHi, hregX86_EDX()));
2219 addInstr(env, mk_iMOVsd_RR(sLo, hregX86_EAX()));
2220 addInstr(env, X86Instr_Div(syned, rmRight));
2221 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2222 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2223 *rHi = tHi;
2224 *rLo = tLo;
2225 return;
2226 }
2227
2228 /* Or64/And64/Xor64 */
2229 case Iop_Or64:
2230 case Iop_And64:
2231 case Iop_Xor64: {
2232 HReg xLo, xHi, yLo, yHi;
2233 HReg tLo = newVRegI(env);
2234 HReg tHi = newVRegI(env);
2235 X86AluOp op = e->Iex.Binop.op==Iop_Or64 ? Xalu_OR
2236 : e->Iex.Binop.op==Iop_And64 ? Xalu_AND
2237 : Xalu_XOR;
2238 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2239 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2240 addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2241 addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yHi), tHi));
2242 addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2243 addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yLo), tLo));
2244 *rHi = tHi;
2245 *rLo = tLo;
2246 return;
2247 }
2248
2249 /* Add64/Sub64 */
2250 case Iop_Add64:
2251 if (e->Iex.Binop.arg2->tag == Iex_Const) {
2252 /* special case Add64(e, const) */
2253 ULong w64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
2254 UInt wHi = toUInt(w64 >> 32);
2255 UInt wLo = toUInt(w64);
2256 HReg tLo = newVRegI(env);
2257 HReg tHi = newVRegI(env);
2258 HReg xLo, xHi;
2259 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64);
2260 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2261 addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2262 addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2263 addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(wLo), tLo));
2264 addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Imm(wHi), tHi));
2265 *rHi = tHi;
2266 *rLo = tLo;
2267 return;
2268 }
2269 /* else fall through to the generic case */
2270 case Iop_Sub64: {
2271 HReg xLo, xHi, yLo, yHi;
2272 HReg tLo = newVRegI(env);
2273 HReg tHi = newVRegI(env);
2274 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2275 addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2276 addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2277 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2278 if (e->Iex.Binop.op==Iop_Add64) {
2279 addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Reg(yLo), tLo));
2280 addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Reg(yHi), tHi));
2281 } else {
2282 addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
2283 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
2284 }
2285 *rHi = tHi;
2286 *rLo = tLo;
2287 return;
2288 }
2289
2290 /* 32HLto64(e1,e2) */
2291 case Iop_32HLto64:
2292 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2293 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2294 return;
2295
2296 /* 64-bit shifts */
2297 case Iop_Shl64: {
2298 /* We use the same ingenious scheme as gcc. Put the value
2299 to be shifted into %hi:%lo, and the shift amount into
2300 %cl. Then (dsts on right, a la ATT syntax):
2301
2302 shldl %cl, %lo, %hi -- make %hi be right for the
2303 -- shift amt %cl % 32
2304 shll %cl, %lo -- make %lo be right for the
2305 -- shift amt %cl % 32
2306
2307 Now, if (shift amount % 64) is in the range 32 .. 63,
2308 we have to do a fixup, which puts the result low half
2309 into the result high half, and zeroes the low half:
2310
2311 testl $32, %ecx
2312
2313 cmovnz %lo, %hi
2314 movl $0, %tmp -- sigh; need yet another reg
2315 cmovnz %tmp, %lo
2316 */
2317 HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2318 tLo = newVRegI(env);
2319 tHi = newVRegI(env);
2320 tTemp = newVRegI(env);
2321 rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2322 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2323 addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2324 addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2325 addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2326 /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo
2327 and those regs are legitimately modifiable. */
2328 addInstr(env, X86Instr_Sh3232(Xsh_SHL, 0/*%cl*/, tLo, tHi));
2329 addInstr(env, X86Instr_Sh32(Xsh_SHL, 0/*%cl*/, tLo));
2330 addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX())));
2331 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tLo), tHi));
2332 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2333 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tLo));
2334 *rHi = tHi;
2335 *rLo = tLo;
2336 return;
2337 }
2338
2339 case Iop_Shr64: {
2340 /* We use the same ingenious scheme as gcc. Put the value
2341 to be shifted into %hi:%lo, and the shift amount into
2342 %cl. Then:
2343
2344 shrdl %cl, %hi, %lo -- make %lo be right for the
2345 -- shift amt %cl % 32
2346 shrl %cl, %hi -- make %hi be right for the
2347 -- shift amt %cl % 32
2348
2349 Now, if (shift amount % 64) is in the range 32 .. 63,
2350 we have to do a fixup, which puts the result high half
2351 into the result low half, and zeroes the high half:
2352
2353 testl $32, %ecx
2354
2355 cmovnz %hi, %lo
2356 movl $0, %tmp -- sigh; need yet another reg
2357 cmovnz %tmp, %hi
2358 */
2359 HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2360 tLo = newVRegI(env);
2361 tHi = newVRegI(env);
2362 tTemp = newVRegI(env);
2363 rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2364 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2365 addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2366 addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2367 addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2368 /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo
2369 and those regs are legitimately modifiable. */
2370 addInstr(env, X86Instr_Sh3232(Xsh_SHR, 0/*%cl*/, tHi, tLo));
2371 addInstr(env, X86Instr_Sh32(Xsh_SHR, 0/*%cl*/, tHi));
2372 addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX())));
2373 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tHi), tLo));
2374 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2375 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tHi));
2376 *rHi = tHi;
2377 *rLo = tLo;
2378 return;
2379 }
2380
2381 /* F64 -> I64 */
2382 /* Sigh, this is an almost exact copy of the F64 -> I32/I16
2383 case. Unfortunately I see no easy way to avoid the
2384 duplication. */
2385 case Iop_F64toI64S: {
2386 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
2387 HReg tLo = newVRegI(env);
2388 HReg tHi = newVRegI(env);
2389
2390 /* Used several times ... */
2391 /* Careful ... this sharing is only safe because
2392 zero_esp/four_esp do not hold any registers which the
2393 register allocator could attempt to swizzle later. */
2394 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2395 X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
2396
2397 /* rf now holds the value to be converted, and rrm holds
2398 the rounding mode value, encoded as per the
2399 IRRoundingMode enum. The first thing to do is set the
2400 FPU's rounding mode accordingly. */
2401
2402 /* Create a space for the format conversion. */
2403 /* subl $8, %esp */
2404 sub_from_esp(env, 8);
2405
2406 /* Set host rounding mode */
2407 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2408
2409 /* gistll %rf, 0(%esp) */
2410 addInstr(env, X86Instr_FpLdStI(False/*store*/, 8, rf, zero_esp));
2411
2412 /* movl 0(%esp), %dstLo */
2413 /* movl 4(%esp), %dstHi */
2414 addInstr(env, X86Instr_Alu32R(
2415 Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
2416 addInstr(env, X86Instr_Alu32R(
2417 Xalu_MOV, X86RMI_Mem(four_esp), tHi));
2418
2419 /* Restore default FPU rounding. */
2420 set_FPU_rounding_default( env );
2421
2422 /* addl $8, %esp */
2423 add_to_esp(env, 8);
2424
2425 *rHi = tHi;
2426 *rLo = tLo;
2427 return;
2428 }
2429
2430 case Iop_Add8x8:
2431 fn = (HWord)h_generic_calc_Add8x8; goto binnish;
2432 case Iop_Add16x4:
2433 fn = (HWord)h_generic_calc_Add16x4; goto binnish;
2434 case Iop_Add32x2:
2435 fn = (HWord)h_generic_calc_Add32x2; goto binnish;
2436
2437 case Iop_Avg8Ux8:
2438 fn = (HWord)h_generic_calc_Avg8Ux8; goto binnish;
2439 case Iop_Avg16Ux4:
2440 fn = (HWord)h_generic_calc_Avg16Ux4; goto binnish;
2441
2442 case Iop_CmpEQ8x8:
2443 fn = (HWord)h_generic_calc_CmpEQ8x8; goto binnish;
2444 case Iop_CmpEQ16x4:
2445 fn = (HWord)h_generic_calc_CmpEQ16x4; goto binnish;
2446 case Iop_CmpEQ32x2:
2447 fn = (HWord)h_generic_calc_CmpEQ32x2; goto binnish;
2448
2449 case Iop_CmpGT8Sx8:
2450 fn = (HWord)h_generic_calc_CmpGT8Sx8; goto binnish;
2451 case Iop_CmpGT16Sx4:
2452 fn = (HWord)h_generic_calc_CmpGT16Sx4; goto binnish;
2453 case Iop_CmpGT32Sx2:
2454 fn = (HWord)h_generic_calc_CmpGT32Sx2; goto binnish;
2455
2456 case Iop_InterleaveHI8x8:
2457 fn = (HWord)h_generic_calc_InterleaveHI8x8; goto binnish;
2458 case Iop_InterleaveLO8x8:
2459 fn = (HWord)h_generic_calc_InterleaveLO8x8; goto binnish;
2460 case Iop_InterleaveHI16x4:
2461 fn = (HWord)h_generic_calc_InterleaveHI16x4; goto binnish;
2462 case Iop_InterleaveLO16x4:
2463 fn = (HWord)h_generic_calc_InterleaveLO16x4; goto binnish;
2464 case Iop_InterleaveHI32x2:
2465 fn = (HWord)h_generic_calc_InterleaveHI32x2; goto binnish;
2466 case Iop_InterleaveLO32x2:
2467 fn = (HWord)h_generic_calc_InterleaveLO32x2; goto binnish;
2468 case Iop_CatOddLanes16x4:
2469 fn = (HWord)h_generic_calc_CatOddLanes16x4; goto binnish;
2470 case Iop_CatEvenLanes16x4:
2471 fn = (HWord)h_generic_calc_CatEvenLanes16x4; goto binnish;
2472 case Iop_Perm8x8:
2473 fn = (HWord)h_generic_calc_Perm8x8; goto binnish;
2474
2475 case Iop_Max8Ux8:
2476 fn = (HWord)h_generic_calc_Max8Ux8; goto binnish;
2477 case Iop_Max16Sx4:
2478 fn = (HWord)h_generic_calc_Max16Sx4; goto binnish;
2479 case Iop_Min8Ux8:
2480 fn = (HWord)h_generic_calc_Min8Ux8; goto binnish;
2481 case Iop_Min16Sx4:
2482 fn = (HWord)h_generic_calc_Min16Sx4; goto binnish;
2483
2484 case Iop_Mul16x4:
2485 fn = (HWord)h_generic_calc_Mul16x4; goto binnish;
2486 case Iop_Mul32x2:
2487 fn = (HWord)h_generic_calc_Mul32x2; goto binnish;
2488 case Iop_MulHi16Sx4:
2489 fn = (HWord)h_generic_calc_MulHi16Sx4; goto binnish;
2490 case Iop_MulHi16Ux4:
2491 fn = (HWord)h_generic_calc_MulHi16Ux4; goto binnish;
2492
2493 case Iop_QAdd8Sx8:
2494 fn = (HWord)h_generic_calc_QAdd8Sx8; goto binnish;
2495 case Iop_QAdd16Sx4:
2496 fn = (HWord)h_generic_calc_QAdd16Sx4; goto binnish;
2497 case Iop_QAdd8Ux8:
2498 fn = (HWord)h_generic_calc_QAdd8Ux8; goto binnish;
2499 case Iop_QAdd16Ux4:
2500 fn = (HWord)h_generic_calc_QAdd16Ux4; goto binnish;
2501
2502 case Iop_QNarrowBin32Sto16Sx4:
2503 fn = (HWord)h_generic_calc_QNarrowBin32Sto16Sx4; goto binnish;
2504 case Iop_QNarrowBin16Sto8Sx8:
2505 fn = (HWord)h_generic_calc_QNarrowBin16Sto8Sx8; goto binnish;
2506 case Iop_QNarrowBin16Sto8Ux8:
2507 fn = (HWord)h_generic_calc_QNarrowBin16Sto8Ux8; goto binnish;
2508 case Iop_NarrowBin16to8x8:
2509 fn = (HWord)h_generic_calc_NarrowBin16to8x8; goto binnish;
2510 case Iop_NarrowBin32to16x4:
2511 fn = (HWord)h_generic_calc_NarrowBin32to16x4; goto binnish;
2512
2513 case Iop_QSub8Sx8:
2514 fn = (HWord)h_generic_calc_QSub8Sx8; goto binnish;
2515 case Iop_QSub16Sx4:
2516 fn = (HWord)h_generic_calc_QSub16Sx4; goto binnish;
2517 case Iop_QSub8Ux8:
2518 fn = (HWord)h_generic_calc_QSub8Ux8; goto binnish;
2519 case Iop_QSub16Ux4:
2520 fn = (HWord)h_generic_calc_QSub16Ux4; goto binnish;
2521
2522 case Iop_Sub8x8:
2523 fn = (HWord)h_generic_calc_Sub8x8; goto binnish;
2524 case Iop_Sub16x4:
2525 fn = (HWord)h_generic_calc_Sub16x4; goto binnish;
2526 case Iop_Sub32x2:
2527 fn = (HWord)h_generic_calc_Sub32x2; goto binnish;
2528
2529 binnish: {
2530 /* Note: the following assumes all helpers are of
2531 signature
2532 ULong fn ( ULong, ULong ), and they are
2533 not marked as regparm functions.
2534 */
2535 HReg xLo, xHi, yLo, yHi;
2536 HReg tLo = newVRegI(env);
2537 HReg tHi = newVRegI(env);
2538 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2539 addInstr(env, X86Instr_Push(X86RMI_Reg(yHi)));
2540 addInstr(env, X86Instr_Push(X86RMI_Reg(yLo)));
2541 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2542 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2543 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2544 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
2545 0, mk_RetLoc_simple(RLPri_2Int) ));
2546 add_to_esp(env, 4*4);
2547 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2548 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2549 *rHi = tHi;
2550 *rLo = tLo;
2551 return;
2552 }
2553
2554 case Iop_ShlN32x2:
2555 fn = (HWord)h_generic_calc_ShlN32x2; goto shifty;
2556 case Iop_ShlN16x4:
2557 fn = (HWord)h_generic_calc_ShlN16x4; goto shifty;
2558 case Iop_ShlN8x8:
2559 fn = (HWord)h_generic_calc_ShlN8x8; goto shifty;
2560 case Iop_ShrN32x2:
2561 fn = (HWord)h_generic_calc_ShrN32x2; goto shifty;
2562 case Iop_ShrN16x4:
2563 fn = (HWord)h_generic_calc_ShrN16x4; goto shifty;
2564 case Iop_SarN32x2:
2565 fn = (HWord)h_generic_calc_SarN32x2; goto shifty;
2566 case Iop_SarN16x4:
2567 fn = (HWord)h_generic_calc_SarN16x4; goto shifty;
2568 case Iop_SarN8x8:
2569 fn = (HWord)h_generic_calc_SarN8x8; goto shifty;
2570 shifty: {
2571 /* Note: the following assumes all helpers are of
2572 signature
2573 ULong fn ( ULong, UInt ), and they are
2574 not marked as regparm functions.
2575 */
2576 HReg xLo, xHi;
2577 HReg tLo = newVRegI(env);
2578 HReg tHi = newVRegI(env);
2579 X86RMI* y = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2580 addInstr(env, X86Instr_Push(y));
2581 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2582 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2583 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2584 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
2585 0, mk_RetLoc_simple(RLPri_2Int) ));
2586 add_to_esp(env, 3*4);
2587 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2588 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2589 *rHi = tHi;
2590 *rLo = tLo;
2591 return;
2592 }
2593
2594 default:
2595 break;
2596 }
2597 } /* if (e->tag == Iex_Binop) */
2598
2599
2600 /* --------- UNARY ops --------- */
2601 if (e->tag == Iex_Unop) {
2602 switch (e->Iex.Unop.op) {
2603
2604 /* 32Sto64(e) */
2605 case Iop_32Sto64: {
2606 HReg tLo = newVRegI(env);
2607 HReg tHi = newVRegI(env);
2608 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2609 addInstr(env, mk_iMOVsd_RR(src,tHi));
2610 addInstr(env, mk_iMOVsd_RR(src,tLo));
2611 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tHi));
2612 *rHi = tHi;
2613 *rLo = tLo;
2614 return;
2615 }
2616
2617 /* 32Uto64(e) */
2618 case Iop_32Uto64: {
2619 HReg tLo = newVRegI(env);
2620 HReg tHi = newVRegI(env);
2621 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2622 addInstr(env, mk_iMOVsd_RR(src,tLo));
2623 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2624 *rHi = tHi;
2625 *rLo = tLo;
2626 return;
2627 }
2628
2629 /* 16Uto64(e) */
2630 case Iop_16Uto64: {
2631 HReg tLo = newVRegI(env);
2632 HReg tHi = newVRegI(env);
2633 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2634 addInstr(env, mk_iMOVsd_RR(src,tLo));
2635 addInstr(env, X86Instr_Alu32R(Xalu_AND,
2636 X86RMI_Imm(0xFFFF), tLo));
2637 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2638 *rHi = tHi;
2639 *rLo = tLo;
2640 return;
2641 }
2642
2643 /* V128{HI}to64 */
2644 case Iop_V128HIto64:
2645 case Iop_V128to64: {
2646 Int off = e->Iex.Unop.op==Iop_V128HIto64 ? 8 : 0;
2647 HReg tLo = newVRegI(env);
2648 HReg tHi = newVRegI(env);
2649 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
2650 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
2651 X86AMode* espLO = X86AMode_IR(off, hregX86_ESP());
2652 X86AMode* espHI = X86AMode_IR(off+4, hregX86_ESP());
2653 sub_from_esp(env, 16);
2654 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
2655 addInstr(env, X86Instr_Alu32R( Xalu_MOV,
2656 X86RMI_Mem(espLO), tLo ));
2657 addInstr(env, X86Instr_Alu32R( Xalu_MOV,
2658 X86RMI_Mem(espHI), tHi ));
2659 add_to_esp(env, 16);
2660 *rHi = tHi;
2661 *rLo = tLo;
2662 return;
2663 }
2664
2665 /* could do better than this, but for now ... */
2666 case Iop_1Sto64: {
2667 HReg tLo = newVRegI(env);
2668 HReg tHi = newVRegI(env);
2669 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2670 addInstr(env, X86Instr_Set32(cond,tLo));
2671 addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, tLo));
2672 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tLo));
2673 addInstr(env, mk_iMOVsd_RR(tLo, tHi));
2674 *rHi = tHi;
2675 *rLo = tLo;
2676 return;
2677 }
2678
2679 /* Not64(e) */
2680 case Iop_Not64: {
2681 HReg tLo = newVRegI(env);
2682 HReg tHi = newVRegI(env);
2683 HReg sHi, sLo;
2684 iselInt64Expr(&sHi, &sLo, env, e->Iex.Unop.arg);
2685 addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2686 addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2687 addInstr(env, X86Instr_Unary32(Xun_NOT,tHi));
2688 addInstr(env, X86Instr_Unary32(Xun_NOT,tLo));
2689 *rHi = tHi;
2690 *rLo = tLo;
2691 return;
2692 }
2693
2694 /* Left64(e) */
2695 case Iop_Left64: {
2696 HReg yLo, yHi;
2697 HReg tLo = newVRegI(env);
2698 HReg tHi = newVRegI(env);
2699 /* yHi:yLo = arg */
2700 iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
2701 /* tLo = 0 - yLo, and set carry */
2702 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tLo));
2703 addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
2704 /* tHi = 0 - yHi - carry */
2705 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2706 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
2707 /* So now we have tHi:tLo = -arg. To finish off, or 'arg'
2708 back in, so as to give the final result
2709 tHi:tLo = arg | -arg. */
2710 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yLo), tLo));
2711 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yHi), tHi));
2712 *rHi = tHi;
2713 *rLo = tLo;
2714 return;
2715 }
2716
2717 /* --- patterns rooted at: CmpwNEZ64 --- */
2718
2719 /* CmpwNEZ64(e) */
2720 case Iop_CmpwNEZ64: {
2721
2722 DECLARE_PATTERN(p_CmpwNEZ64_Or64);
2723 DEFINE_PATTERN(p_CmpwNEZ64_Or64,
2724 unop(Iop_CmpwNEZ64,binop(Iop_Or64,bind(0),bind(1))));
2725 if (matchIRExpr(&mi, p_CmpwNEZ64_Or64, e)) {
2726 /* CmpwNEZ64(Or64(x,y)) */
2727 HReg xHi,xLo,yHi,yLo;
2728 HReg xBoth = newVRegI(env);
2729 HReg merged = newVRegI(env);
2730 HReg tmp2 = newVRegI(env);
2731
2732 iselInt64Expr(&xHi,&xLo, env, mi.bindee[0]);
2733 addInstr(env, mk_iMOVsd_RR(xHi,xBoth));
2734 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2735 X86RMI_Reg(xLo),xBoth));
2736
2737 iselInt64Expr(&yHi,&yLo, env, mi.bindee[1]);
2738 addInstr(env, mk_iMOVsd_RR(yHi,merged));
2739 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2740 X86RMI_Reg(yLo),merged));
2741 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2742 X86RMI_Reg(xBoth),merged));
2743
2744 /* tmp2 = (merged | -merged) >>s 31 */
2745 addInstr(env, mk_iMOVsd_RR(merged,tmp2));
2746 addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2));
2747 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2748 X86RMI_Reg(merged), tmp2));
2749 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2));
2750 *rHi = tmp2;
2751 *rLo = tmp2;
2752 return;
2753 } else {
2754 /* CmpwNEZ64(e) */
2755 HReg srcLo, srcHi;
2756 HReg tmp1 = newVRegI(env);
2757 HReg tmp2 = newVRegI(env);
2758 /* srcHi:srcLo = arg */
2759 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
2760 /* tmp1 = srcHi | srcLo */
2761 addInstr(env, mk_iMOVsd_RR(srcHi,tmp1));
2762 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2763 X86RMI_Reg(srcLo), tmp1));
2764 /* tmp2 = (tmp1 | -tmp1) >>s 31 */
2765 addInstr(env, mk_iMOVsd_RR(tmp1,tmp2));
2766 addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2));
2767 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2768 X86RMI_Reg(tmp1), tmp2));
2769 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2));
2770 *rHi = tmp2;
2771 *rLo = tmp2;
2772 return;
2773 }
2774 }
2775
2776 /* ReinterpF64asI64(e) */
2777 /* Given an IEEE754 double, produce an I64 with the same bit
2778 pattern. */
2779 case Iop_ReinterpF64asI64: {
2780 HReg rf = iselDblExpr(env, e->Iex.Unop.arg);
2781 HReg tLo = newVRegI(env);
2782 HReg tHi = newVRegI(env);
2783 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2784 X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
2785 /* paranoia */
2786 set_FPU_rounding_default(env);
2787 /* subl $8, %esp */
2788 sub_from_esp(env, 8);
2789 /* gstD %rf, 0(%esp) */
2790 addInstr(env,
2791 X86Instr_FpLdSt(False/*store*/, 8, rf, zero_esp));
2792 /* movl 0(%esp), %tLo */
2793 addInstr(env,
2794 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
2795 /* movl 4(%esp), %tHi */
2796 addInstr(env,
2797 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(four_esp), tHi));
2798 /* addl $8, %esp */
2799 add_to_esp(env, 8);
2800 *rHi = tHi;
2801 *rLo = tLo;
2802 return;
2803 }
2804
2805 case Iop_CmpNEZ32x2:
2806 fn = (HWord)h_generic_calc_CmpNEZ32x2; goto unish;
2807 case Iop_CmpNEZ16x4:
2808 fn = (HWord)h_generic_calc_CmpNEZ16x4; goto unish;
2809 case Iop_CmpNEZ8x8:
2810 fn = (HWord)h_generic_calc_CmpNEZ8x8; goto unish;
2811 unish: {
2812 /* Note: the following assumes all helpers are of
2813 signature
2814 ULong fn ( ULong ), and they are
2815 not marked as regparm functions.
2816 */
2817 HReg xLo, xHi;
2818 HReg tLo = newVRegI(env);
2819 HReg tHi = newVRegI(env);
2820 iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg);
2821 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2822 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2823 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
2824 0, mk_RetLoc_simple(RLPri_2Int) ));
2825 add_to_esp(env, 2*4);
2826 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2827 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2828 *rHi = tHi;
2829 *rLo = tLo;
2830 return;
2831 }
2832
2833 default:
2834 break;
2835 }
2836 } /* if (e->tag == Iex_Unop) */
2837
2838
2839 /* --------- CCALL --------- */
2840 if (e->tag == Iex_CCall) {
2841 HReg tLo = newVRegI(env);
2842 HReg tHi = newVRegI(env);
2843
2844 /* Marshal args, do the call, clear stack. */
2845 UInt addToSp = 0;
2846 RetLoc rloc = mk_RetLoc_INVALID();
2847 doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
2848 e->Iex.CCall.cee,
2849 e->Iex.CCall.retty, e->Iex.CCall.args );
2850 vassert(is_sane_RetLoc(rloc));
2851 vassert(rloc.pri == RLPri_2Int);
2852 vassert(addToSp == 0);
2853 /* */
2854
2855 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2856 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2857 *rHi = tHi;
2858 *rLo = tLo;
2859 return;
2860 }
2861
2862 ppIRExpr(e);
2863 vpanic("iselInt64Expr");
2864 }
2865
2866
2867 /*---------------------------------------------------------*/
2868 /*--- ISEL: Floating point expressions (32 bit) ---*/
2869 /*---------------------------------------------------------*/
2870
2871 /* Nothing interesting here; really just wrappers for
2872 64-bit stuff. */
2873
iselFltExpr(ISelEnv * env,IRExpr * e)2874 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
2875 {
2876 HReg r = iselFltExpr_wrk( env, e );
2877 # if 0
2878 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2879 # endif
2880 vassert(hregClass(r) == HRcFlt64); /* yes, really Flt64 */
2881 vassert(hregIsVirtual(r));
2882 return r;
2883 }
2884
2885 /* DO NOT CALL THIS DIRECTLY */
iselFltExpr_wrk(ISelEnv * env,IRExpr * e)2886 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
2887 {
2888 IRType ty = typeOfIRExpr(env->type_env,e);
2889 vassert(ty == Ity_F32);
2890
2891 if (e->tag == Iex_RdTmp) {
2892 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2893 }
2894
2895 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2896 X86AMode* am;
2897 HReg res = newVRegF(env);
2898 vassert(e->Iex.Load.ty == Ity_F32);
2899 am = iselIntExpr_AMode(env, e->Iex.Load.addr);
2900 addInstr(env, X86Instr_FpLdSt(True/*load*/, 4, res, am));
2901 return res;
2902 }
2903
2904 if (e->tag == Iex_Binop
2905 && e->Iex.Binop.op == Iop_F64toF32) {
2906 /* Although the result is still held in a standard FPU register,
2907 we need to round it to reflect the loss of accuracy/range
2908 entailed in casting it to a 32-bit float. */
2909 HReg dst = newVRegF(env);
2910 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
2911 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2912 addInstr(env, X86Instr_Fp64to32(src,dst));
2913 set_FPU_rounding_default( env );
2914 return dst;
2915 }
2916
2917 if (e->tag == Iex_Get) {
2918 X86AMode* am = X86AMode_IR( e->Iex.Get.offset,
2919 hregX86_EBP() );
2920 HReg res = newVRegF(env);
2921 addInstr(env, X86Instr_FpLdSt( True/*load*/, 4, res, am ));
2922 return res;
2923 }
2924
2925 if (e->tag == Iex_Unop
2926 && e->Iex.Unop.op == Iop_ReinterpI32asF32) {
2927 /* Given an I32, produce an IEEE754 float with the same bit
2928 pattern. */
2929 HReg dst = newVRegF(env);
2930 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
2931 /* paranoia */
2932 addInstr(env, X86Instr_Push(rmi));
2933 addInstr(env, X86Instr_FpLdSt(
2934 True/*load*/, 4, dst,
2935 X86AMode_IR(0, hregX86_ESP())));
2936 add_to_esp(env, 4);
2937 return dst;
2938 }
2939
2940 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) {
2941 HReg rf = iselFltExpr(env, e->Iex.Binop.arg2);
2942 HReg dst = newVRegF(env);
2943
2944 /* rf now holds the value to be rounded. The first thing to do
2945 is set the FPU's rounding mode accordingly. */
2946
2947 /* Set host rounding mode */
2948 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2949
2950 /* grndint %rf, %dst */
2951 addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst));
2952
2953 /* Restore default FPU rounding. */
2954 set_FPU_rounding_default( env );
2955
2956 return dst;
2957 }
2958
2959 ppIRExpr(e);
2960 vpanic("iselFltExpr_wrk");
2961 }
2962
2963
2964 /*---------------------------------------------------------*/
2965 /*--- ISEL: Floating point expressions (64 bit) ---*/
2966 /*---------------------------------------------------------*/
2967
2968 /* Compute a 64-bit floating point value into a register, the identity
2969 of which is returned. As with iselIntExpr_R, the reg may be either
2970 real or virtual; in any case it must not be changed by subsequent
2971 code emitted by the caller. */
2972
2973 /* IEEE 754 formats. From http://www.freesoft.org/CIE/RFC/1832/32.htm:
2974
2975 Type S (1 bit) E (11 bits) F (52 bits)
2976 ---- --------- ----------- -----------
2977 signalling NaN u 2047 (max) .0uuuuu---u
2978 (with at least
2979 one 1 bit)
2980 quiet NaN u 2047 (max) .1uuuuu---u
2981
2982 negative infinity 1 2047 (max) .000000---0
2983
2984 positive infinity 0 2047 (max) .000000---0
2985
2986 negative zero 1 0 .000000---0
2987
2988 positive zero 0 0 .000000---0
2989 */
2990
iselDblExpr(ISelEnv * env,IRExpr * e)2991 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
2992 {
2993 HReg r = iselDblExpr_wrk( env, e );
2994 # if 0
2995 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2996 # endif
2997 vassert(hregClass(r) == HRcFlt64);
2998 vassert(hregIsVirtual(r));
2999 return r;
3000 }
3001
3002 /* DO NOT CALL THIS DIRECTLY */
iselDblExpr_wrk(ISelEnv * env,IRExpr * e)3003 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
3004 {
3005 IRType ty = typeOfIRExpr(env->type_env,e);
3006 vassert(e);
3007 vassert(ty == Ity_F64);
3008
3009 if (e->tag == Iex_RdTmp) {
3010 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3011 }
3012
3013 if (e->tag == Iex_Const) {
3014 union { UInt u32x2[2]; ULong u64; Double f64; } u;
3015 HReg freg = newVRegF(env);
3016 vassert(sizeof(u) == 8);
3017 vassert(sizeof(u.u64) == 8);
3018 vassert(sizeof(u.f64) == 8);
3019 vassert(sizeof(u.u32x2) == 8);
3020
3021 if (e->Iex.Const.con->tag == Ico_F64) {
3022 u.f64 = e->Iex.Const.con->Ico.F64;
3023 }
3024 else if (e->Iex.Const.con->tag == Ico_F64i) {
3025 u.u64 = e->Iex.Const.con->Ico.F64i;
3026 }
3027 else
3028 vpanic("iselDblExpr(x86): const");
3029
3030 addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[1])));
3031 addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[0])));
3032 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, freg,
3033 X86AMode_IR(0, hregX86_ESP())));
3034 add_to_esp(env, 8);
3035 return freg;
3036 }
3037
3038 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3039 X86AMode* am;
3040 HReg res = newVRegF(env);
3041 vassert(e->Iex.Load.ty == Ity_F64);
3042 am = iselIntExpr_AMode(env, e->Iex.Load.addr);
3043 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, res, am));
3044 return res;
3045 }
3046
3047 if (e->tag == Iex_Get) {
3048 X86AMode* am = X86AMode_IR( e->Iex.Get.offset,
3049 hregX86_EBP() );
3050 HReg res = newVRegF(env);
3051 addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am ));
3052 return res;
3053 }
3054
3055 if (e->tag == Iex_GetI) {
3056 X86AMode* am
3057 = genGuestArrayOffset(
3058 env, e->Iex.GetI.descr,
3059 e->Iex.GetI.ix, e->Iex.GetI.bias );
3060 HReg res = newVRegF(env);
3061 addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am ));
3062 return res;
3063 }
3064
3065 if (e->tag == Iex_Triop) {
3066 X86FpOp fpop = Xfp_INVALID;
3067 IRTriop *triop = e->Iex.Triop.details;
3068 switch (triop->op) {
3069 case Iop_AddF64: fpop = Xfp_ADD; break;
3070 case Iop_SubF64: fpop = Xfp_SUB; break;
3071 case Iop_MulF64: fpop = Xfp_MUL; break;
3072 case Iop_DivF64: fpop = Xfp_DIV; break;
3073 case Iop_ScaleF64: fpop = Xfp_SCALE; break;
3074 case Iop_Yl2xF64: fpop = Xfp_YL2X; break;
3075 case Iop_Yl2xp1F64: fpop = Xfp_YL2XP1; break;
3076 case Iop_AtanF64: fpop = Xfp_ATAN; break;
3077 case Iop_PRemF64: fpop = Xfp_PREM; break;
3078 case Iop_PRem1F64: fpop = Xfp_PREM1; break;
3079 default: break;
3080 }
3081 if (fpop != Xfp_INVALID) {
3082 HReg res = newVRegF(env);
3083 HReg srcL = iselDblExpr(env, triop->arg2);
3084 HReg srcR = iselDblExpr(env, triop->arg3);
3085 /* XXXROUNDINGFIXME */
3086 /* set roundingmode here */
3087 addInstr(env, X86Instr_FpBinary(fpop,srcL,srcR,res));
3088 if (fpop != Xfp_ADD && fpop != Xfp_SUB
3089 && fpop != Xfp_MUL && fpop != Xfp_DIV)
3090 roundToF64(env, res);
3091 return res;
3092 }
3093 }
3094
3095 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) {
3096 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
3097 HReg dst = newVRegF(env);
3098
3099 /* rf now holds the value to be rounded. The first thing to do
3100 is set the FPU's rounding mode accordingly. */
3101
3102 /* Set host rounding mode */
3103 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
3104
3105 /* grndint %rf, %dst */
3106 addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst));
3107
3108 /* Restore default FPU rounding. */
3109 set_FPU_rounding_default( env );
3110
3111 return dst;
3112 }
3113
3114 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64StoF64) {
3115 HReg dst = newVRegF(env);
3116 HReg rHi,rLo;
3117 iselInt64Expr( &rHi, &rLo, env, e->Iex.Binop.arg2);
3118 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
3119 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
3120
3121 /* Set host rounding mode */
3122 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
3123
3124 addInstr(env, X86Instr_FpLdStI(
3125 True/*load*/, 8, dst,
3126 X86AMode_IR(0, hregX86_ESP())));
3127
3128 /* Restore default FPU rounding. */
3129 set_FPU_rounding_default( env );
3130
3131 add_to_esp(env, 8);
3132 return dst;
3133 }
3134
3135 if (e->tag == Iex_Binop) {
3136 X86FpOp fpop = Xfp_INVALID;
3137 switch (e->Iex.Binop.op) {
3138 case Iop_SinF64: fpop = Xfp_SIN; break;
3139 case Iop_CosF64: fpop = Xfp_COS; break;
3140 case Iop_TanF64: fpop = Xfp_TAN; break;
3141 case Iop_2xm1F64: fpop = Xfp_2XM1; break;
3142 case Iop_SqrtF64: fpop = Xfp_SQRT; break;
3143 default: break;
3144 }
3145 if (fpop != Xfp_INVALID) {
3146 HReg res = newVRegF(env);
3147 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
3148 /* XXXROUNDINGFIXME */
3149 /* set roundingmode here */
3150 /* Note that X86Instr_FpUnary(Xfp_TAN,..) sets the condition
3151 codes. I don't think that matters, since this insn
3152 selector never generates such an instruction intervening
3153 between an flag-setting instruction and a flag-using
3154 instruction. */
3155 addInstr(env, X86Instr_FpUnary(fpop,src,res));
3156 if (fpop != Xfp_SQRT
3157 && fpop != Xfp_NEG && fpop != Xfp_ABS)
3158 roundToF64(env, res);
3159 return res;
3160 }
3161 }
3162
3163 if (e->tag == Iex_Unop) {
3164 X86FpOp fpop = Xfp_INVALID;
3165 switch (e->Iex.Unop.op) {
3166 case Iop_NegF64: fpop = Xfp_NEG; break;
3167 case Iop_AbsF64: fpop = Xfp_ABS; break;
3168 default: break;
3169 }
3170 if (fpop != Xfp_INVALID) {
3171 HReg res = newVRegF(env);
3172 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3173 addInstr(env, X86Instr_FpUnary(fpop,src,res));
3174 /* No need to do roundToF64(env,res) for Xfp_NEG or Xfp_ABS,
3175 but might need to do that for other unary ops. */
3176 return res;
3177 }
3178 }
3179
3180 if (e->tag == Iex_Unop) {
3181 switch (e->Iex.Unop.op) {
3182 case Iop_I32StoF64: {
3183 HReg dst = newVRegF(env);
3184 HReg ri = iselIntExpr_R(env, e->Iex.Unop.arg);
3185 addInstr(env, X86Instr_Push(X86RMI_Reg(ri)));
3186 set_FPU_rounding_default(env);
3187 addInstr(env, X86Instr_FpLdStI(
3188 True/*load*/, 4, dst,
3189 X86AMode_IR(0, hregX86_ESP())));
3190 add_to_esp(env, 4);
3191 return dst;
3192 }
3193 case Iop_ReinterpI64asF64: {
3194 /* Given an I64, produce an IEEE754 double with the same
3195 bit pattern. */
3196 HReg dst = newVRegF(env);
3197 HReg rHi, rLo;
3198 iselInt64Expr( &rHi, &rLo, env, e->Iex.Unop.arg);
3199 /* paranoia */
3200 set_FPU_rounding_default(env);
3201 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
3202 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
3203 addInstr(env, X86Instr_FpLdSt(
3204 True/*load*/, 8, dst,
3205 X86AMode_IR(0, hregX86_ESP())));
3206 add_to_esp(env, 8);
3207 return dst;
3208 }
3209 case Iop_F32toF64: {
3210 /* this is a no-op */
3211 HReg res = iselFltExpr(env, e->Iex.Unop.arg);
3212 return res;
3213 }
3214 default:
3215 break;
3216 }
3217 }
3218
3219 /* --------- MULTIPLEX --------- */
3220 if (e->tag == Iex_ITE) { // VFD
3221 if (ty == Ity_F64
3222 && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
3223 HReg r1 = iselDblExpr(env, e->Iex.ITE.iftrue);
3224 HReg r0 = iselDblExpr(env, e->Iex.ITE.iffalse);
3225 HReg dst = newVRegF(env);
3226 addInstr(env, X86Instr_FpUnary(Xfp_MOV,r1,dst));
3227 X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
3228 addInstr(env, X86Instr_FpCMov(cc ^ 1, r0, dst));
3229 return dst;
3230 }
3231 }
3232
3233 ppIRExpr(e);
3234 vpanic("iselDblExpr_wrk");
3235 }
3236
3237
3238 /*---------------------------------------------------------*/
3239 /*--- ISEL: SIMD (Vector) expressions, 128 bit. ---*/
3240 /*---------------------------------------------------------*/
3241
iselVecExpr(ISelEnv * env,IRExpr * e)3242 static HReg iselVecExpr ( ISelEnv* env, IRExpr* e )
3243 {
3244 HReg r = iselVecExpr_wrk( env, e );
3245 # if 0
3246 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3247 # endif
3248 vassert(hregClass(r) == HRcVec128);
3249 vassert(hregIsVirtual(r));
3250 return r;
3251 }
3252
3253
3254 /* DO NOT CALL THIS DIRECTLY */
iselVecExpr_wrk(ISelEnv * env,IRExpr * e)3255 static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
3256 {
3257
3258 # define REQUIRE_SSE1 \
3259 do { if (env->hwcaps == 0/*baseline, no sse*/ \
3260 || env->hwcaps == VEX_HWCAPS_X86_MMXEXT /*Integer SSE*/) \
3261 goto vec_fail; \
3262 } while (0)
3263
3264 # define REQUIRE_SSE2 \
3265 do { if (0 == (env->hwcaps & VEX_HWCAPS_X86_SSE2)) \
3266 goto vec_fail; \
3267 } while (0)
3268
3269 # define SSE2_OR_ABOVE \
3270 (env->hwcaps & VEX_HWCAPS_X86_SSE2)
3271
3272 HWord fn = 0; /* address of helper fn, if required */
3273 MatchInfo mi;
3274 Bool arg1isEReg = False;
3275 X86SseOp op = Xsse_INVALID;
3276 IRType ty = typeOfIRExpr(env->type_env,e);
3277 vassert(e);
3278 vassert(ty == Ity_V128);
3279
3280 REQUIRE_SSE1;
3281
3282 if (e->tag == Iex_RdTmp) {
3283 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3284 }
3285
3286 if (e->tag == Iex_Get) {
3287 HReg dst = newVRegV(env);
3288 addInstr(env, X86Instr_SseLdSt(
3289 True/*load*/,
3290 dst,
3291 X86AMode_IR(e->Iex.Get.offset, hregX86_EBP())
3292 )
3293 );
3294 return dst;
3295 }
3296
3297 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3298 HReg dst = newVRegV(env);
3299 X86AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
3300 addInstr(env, X86Instr_SseLdSt( True/*load*/, dst, am ));
3301 return dst;
3302 }
3303
3304 if (e->tag == Iex_Const) {
3305 HReg dst = newVRegV(env);
3306 vassert(e->Iex.Const.con->tag == Ico_V128);
3307 addInstr(env, X86Instr_SseConst(e->Iex.Const.con->Ico.V128, dst));
3308 return dst;
3309 }
3310
3311 if (e->tag == Iex_Unop) {
3312
3313 if (SSE2_OR_ABOVE) {
3314 /* 64UtoV128(LDle:I64(addr)) */
3315 DECLARE_PATTERN(p_zwiden_load64);
3316 DEFINE_PATTERN(p_zwiden_load64,
3317 unop(Iop_64UtoV128,
3318 IRExpr_Load(Iend_LE,Ity_I64,bind(0))));
3319 if (matchIRExpr(&mi, p_zwiden_load64, e)) {
3320 X86AMode* am = iselIntExpr_AMode(env, mi.bindee[0]);
3321 HReg dst = newVRegV(env);
3322 addInstr(env, X86Instr_SseLdzLO(8, dst, am));
3323 return dst;
3324 }
3325 }
3326
3327 switch (e->Iex.Unop.op) {
3328
3329 case Iop_NotV128: {
3330 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3331 return do_sse_Not128(env, arg);
3332 }
3333
3334 case Iop_CmpNEZ64x2: {
3335 /* We can use SSE2 instructions for this. */
3336 /* Ideally, we want to do a 64Ix2 comparison against zero of
3337 the operand. Problem is no such insn exists. Solution
3338 therefore is to do a 32Ix4 comparison instead, and bitwise-
3339 negate (NOT) the result. Let a,b,c,d be 32-bit lanes, and
3340 let the not'd result of this initial comparison be a:b:c:d.
3341 What we need to compute is (a|b):(a|b):(c|d):(c|d). So, use
3342 pshufd to create a value b:a:d:c, and OR that with a:b:c:d,
3343 giving the required result.
3344
3345 The required selection sequence is 2,3,0,1, which
3346 according to Intel's documentation means the pshufd
3347 literal value is 0xB1, that is,
3348 (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)
3349 */
3350 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3351 HReg tmp = newVRegV(env);
3352 HReg dst = newVRegV(env);
3353 REQUIRE_SSE2;
3354 addInstr(env, X86Instr_SseReRg(Xsse_XOR, tmp, tmp));
3355 addInstr(env, X86Instr_SseReRg(Xsse_CMPEQ32, arg, tmp));
3356 tmp = do_sse_Not128(env, tmp);
3357 addInstr(env, X86Instr_SseShuf(0xB1, tmp, dst));
3358 addInstr(env, X86Instr_SseReRg(Xsse_OR, tmp, dst));
3359 return dst;
3360 }
3361
3362 case Iop_CmpNEZ32x4: {
3363 /* Sigh, we have to generate lousy code since this has to
3364 work on SSE1 hosts */
3365 /* basically, the idea is: for each lane:
3366 movl lane, %r ; negl %r (now CF = lane==0 ? 0 : 1)
3367 sbbl %r, %r (now %r = 1Sto32(CF))
3368 movl %r, lane
3369 */
3370 Int i;
3371 X86AMode* am;
3372 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3373 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3374 HReg dst = newVRegV(env);
3375 HReg r32 = newVRegI(env);
3376 sub_from_esp(env, 16);
3377 addInstr(env, X86Instr_SseLdSt(False/*store*/, arg, esp0));
3378 for (i = 0; i < 4; i++) {
3379 am = X86AMode_IR(i*4, hregX86_ESP());
3380 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), r32));
3381 addInstr(env, X86Instr_Unary32(Xun_NEG, r32));
3382 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(r32), r32));
3383 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r32), am));
3384 }
3385 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3386 add_to_esp(env, 16);
3387 return dst;
3388 }
3389
3390 case Iop_CmpNEZ8x16:
3391 case Iop_CmpNEZ16x8: {
3392 /* We can use SSE2 instructions for this. */
3393 HReg arg;
3394 HReg vec0 = newVRegV(env);
3395 HReg vec1 = newVRegV(env);
3396 HReg dst = newVRegV(env);
3397 X86SseOp cmpOp
3398 = e->Iex.Unop.op==Iop_CmpNEZ16x8 ? Xsse_CMPEQ16
3399 : Xsse_CMPEQ8;
3400 REQUIRE_SSE2;
3401 addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec0, vec0));
3402 addInstr(env, mk_vMOVsd_RR(vec0, vec1));
3403 addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, vec1, vec1));
3404 /* defer arg computation to here so as to give CMPEQF as long
3405 as possible to complete */
3406 arg = iselVecExpr(env, e->Iex.Unop.arg);
3407 /* vec0 is all 0s; vec1 is all 1s */
3408 addInstr(env, mk_vMOVsd_RR(arg, dst));
3409 /* 16x8 or 8x16 comparison == */
3410 addInstr(env, X86Instr_SseReRg(cmpOp, vec0, dst));
3411 /* invert result */
3412 addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec1, dst));
3413 return dst;
3414 }
3415
3416 case Iop_RecipEst32Fx4: op = Xsse_RCPF; goto do_32Fx4_unary;
3417 case Iop_RSqrtEst32Fx4: op = Xsse_RSQRTF; goto do_32Fx4_unary;
3418 do_32Fx4_unary:
3419 {
3420 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3421 HReg dst = newVRegV(env);
3422 addInstr(env, X86Instr_Sse32Fx4(op, arg, dst));
3423 return dst;
3424 }
3425
3426 case Iop_RecipEst32F0x4: op = Xsse_RCPF; goto do_32F0x4_unary;
3427 case Iop_RSqrtEst32F0x4: op = Xsse_RSQRTF; goto do_32F0x4_unary;
3428 case Iop_Sqrt32F0x4: op = Xsse_SQRTF; goto do_32F0x4_unary;
3429 do_32F0x4_unary:
3430 {
3431 /* A bit subtle. We have to copy the arg to the result
3432 register first, because actually doing the SSE scalar insn
3433 leaves the upper 3/4 of the destination register
3434 unchanged. Whereas the required semantics of these
3435 primops is that the upper 3/4 is simply copied in from the
3436 argument. */
3437 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3438 HReg dst = newVRegV(env);
3439 addInstr(env, mk_vMOVsd_RR(arg, dst));
3440 addInstr(env, X86Instr_Sse32FLo(op, arg, dst));
3441 return dst;
3442 }
3443
3444 case Iop_Sqrt64F0x2: op = Xsse_SQRTF; goto do_64F0x2_unary;
3445 do_64F0x2_unary:
3446 {
3447 /* A bit subtle. We have to copy the arg to the result
3448 register first, because actually doing the SSE scalar insn
3449 leaves the upper half of the destination register
3450 unchanged. Whereas the required semantics of these
3451 primops is that the upper half is simply copied in from the
3452 argument. */
3453 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3454 HReg dst = newVRegV(env);
3455 REQUIRE_SSE2;
3456 addInstr(env, mk_vMOVsd_RR(arg, dst));
3457 addInstr(env, X86Instr_Sse64FLo(op, arg, dst));
3458 return dst;
3459 }
3460
3461 case Iop_32UtoV128: {
3462 HReg dst = newVRegV(env);
3463 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3464 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
3465 addInstr(env, X86Instr_Push(rmi));
3466 addInstr(env, X86Instr_SseLdzLO(4, dst, esp0));
3467 add_to_esp(env, 4);
3468 return dst;
3469 }
3470
3471 case Iop_64UtoV128: {
3472 HReg rHi, rLo;
3473 HReg dst = newVRegV(env);
3474 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3475 iselInt64Expr(&rHi, &rLo, env, e->Iex.Unop.arg);
3476 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
3477 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
3478 addInstr(env, X86Instr_SseLdzLO(8, dst, esp0));
3479 add_to_esp(env, 8);
3480 return dst;
3481 }
3482
3483 default:
3484 break;
3485 } /* switch (e->Iex.Unop.op) */
3486 } /* if (e->tag == Iex_Unop) */
3487
3488 if (e->tag == Iex_Binop) {
3489 switch (e->Iex.Binop.op) {
3490
3491 case Iop_Sqrt64Fx2:
3492 REQUIRE_SSE2;
3493 /* fallthrough */
3494 case Iop_Sqrt32Fx4: {
3495 /* :: (rmode, vec) -> vec */
3496 HReg arg = iselVecExpr(env, e->Iex.Binop.arg2);
3497 HReg dst = newVRegV(env);
3498 /* XXXROUNDINGFIXME */
3499 /* set roundingmode here */
3500 addInstr(env, (e->Iex.Binop.op == Iop_Sqrt64Fx2
3501 ? X86Instr_Sse64Fx2 : X86Instr_Sse32Fx4)
3502 (Xsse_SQRTF, arg, dst));
3503 return dst;
3504 }
3505
3506 case Iop_SetV128lo32: {
3507 HReg dst = newVRegV(env);
3508 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3509 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3510 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3511 sub_from_esp(env, 16);
3512 addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0));
3513 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcI), esp0));
3514 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3515 add_to_esp(env, 16);
3516 return dst;
3517 }
3518
3519 case Iop_SetV128lo64: {
3520 HReg dst = newVRegV(env);
3521 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3522 HReg srcIhi, srcIlo;
3523 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3524 X86AMode* esp4 = advance4(esp0);
3525 iselInt64Expr(&srcIhi, &srcIlo, env, e->Iex.Binop.arg2);
3526 sub_from_esp(env, 16);
3527 addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0));
3528 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIlo), esp0));
3529 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIhi), esp4));
3530 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3531 add_to_esp(env, 16);
3532 return dst;
3533 }
3534
3535 case Iop_64HLtoV128: {
3536 HReg r3, r2, r1, r0;
3537 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3538 X86AMode* esp4 = advance4(esp0);
3539 X86AMode* esp8 = advance4(esp4);
3540 X86AMode* esp12 = advance4(esp8);
3541 HReg dst = newVRegV(env);
3542 /* do this via the stack (easy, convenient, etc) */
3543 sub_from_esp(env, 16);
3544 /* Do the less significant 64 bits */
3545 iselInt64Expr(&r1, &r0, env, e->Iex.Binop.arg2);
3546 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r0), esp0));
3547 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r1), esp4));
3548 /* Do the more significant 64 bits */
3549 iselInt64Expr(&r3, &r2, env, e->Iex.Binop.arg1);
3550 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r2), esp8));
3551 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r3), esp12));
3552 /* Fetch result back from stack. */
3553 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3554 add_to_esp(env, 16);
3555 return dst;
3556 }
3557
3558 case Iop_CmpEQ32Fx4: op = Xsse_CMPEQF; goto do_32Fx4;
3559 case Iop_CmpLT32Fx4: op = Xsse_CMPLTF; goto do_32Fx4;
3560 case Iop_CmpLE32Fx4: op = Xsse_CMPLEF; goto do_32Fx4;
3561 case Iop_CmpUN32Fx4: op = Xsse_CMPUNF; goto do_32Fx4;
3562 case Iop_Max32Fx4: op = Xsse_MAXF; goto do_32Fx4;
3563 case Iop_Min32Fx4: op = Xsse_MINF; goto do_32Fx4;
3564 do_32Fx4:
3565 {
3566 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3567 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3568 HReg dst = newVRegV(env);
3569 addInstr(env, mk_vMOVsd_RR(argL, dst));
3570 addInstr(env, X86Instr_Sse32Fx4(op, argR, dst));
3571 return dst;
3572 }
3573
3574 case Iop_CmpEQ64Fx2: op = Xsse_CMPEQF; goto do_64Fx2;
3575 case Iop_CmpLT64Fx2: op = Xsse_CMPLTF; goto do_64Fx2;
3576 case Iop_CmpLE64Fx2: op = Xsse_CMPLEF; goto do_64Fx2;
3577 case Iop_CmpUN64Fx2: op = Xsse_CMPUNF; goto do_64Fx2;
3578 case Iop_Max64Fx2: op = Xsse_MAXF; goto do_64Fx2;
3579 case Iop_Min64Fx2: op = Xsse_MINF; goto do_64Fx2;
3580 do_64Fx2:
3581 {
3582 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3583 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3584 HReg dst = newVRegV(env);
3585 REQUIRE_SSE2;
3586 addInstr(env, mk_vMOVsd_RR(argL, dst));
3587 addInstr(env, X86Instr_Sse64Fx2(op, argR, dst));
3588 return dst;
3589 }
3590
3591 case Iop_CmpEQ32F0x4: op = Xsse_CMPEQF; goto do_32F0x4;
3592 case Iop_CmpLT32F0x4: op = Xsse_CMPLTF; goto do_32F0x4;
3593 case Iop_CmpLE32F0x4: op = Xsse_CMPLEF; goto do_32F0x4;
3594 case Iop_CmpUN32F0x4: op = Xsse_CMPUNF; goto do_32F0x4;
3595 case Iop_Add32F0x4: op = Xsse_ADDF; goto do_32F0x4;
3596 case Iop_Div32F0x4: op = Xsse_DIVF; goto do_32F0x4;
3597 case Iop_Max32F0x4: op = Xsse_MAXF; goto do_32F0x4;
3598 case Iop_Min32F0x4: op = Xsse_MINF; goto do_32F0x4;
3599 case Iop_Mul32F0x4: op = Xsse_MULF; goto do_32F0x4;
3600 case Iop_Sub32F0x4: op = Xsse_SUBF; goto do_32F0x4;
3601 do_32F0x4: {
3602 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3603 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3604 HReg dst = newVRegV(env);
3605 addInstr(env, mk_vMOVsd_RR(argL, dst));
3606 addInstr(env, X86Instr_Sse32FLo(op, argR, dst));
3607 return dst;
3608 }
3609
3610 case Iop_CmpEQ64F0x2: op = Xsse_CMPEQF; goto do_64F0x2;
3611 case Iop_CmpLT64F0x2: op = Xsse_CMPLTF; goto do_64F0x2;
3612 case Iop_CmpLE64F0x2: op = Xsse_CMPLEF; goto do_64F0x2;
3613 case Iop_CmpUN64F0x2: op = Xsse_CMPUNF; goto do_64F0x2;
3614 case Iop_Add64F0x2: op = Xsse_ADDF; goto do_64F0x2;
3615 case Iop_Div64F0x2: op = Xsse_DIVF; goto do_64F0x2;
3616 case Iop_Max64F0x2: op = Xsse_MAXF; goto do_64F0x2;
3617 case Iop_Min64F0x2: op = Xsse_MINF; goto do_64F0x2;
3618 case Iop_Mul64F0x2: op = Xsse_MULF; goto do_64F0x2;
3619 case Iop_Sub64F0x2: op = Xsse_SUBF; goto do_64F0x2;
3620 do_64F0x2: {
3621 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3622 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3623 HReg dst = newVRegV(env);
3624 REQUIRE_SSE2;
3625 addInstr(env, mk_vMOVsd_RR(argL, dst));
3626 addInstr(env, X86Instr_Sse64FLo(op, argR, dst));
3627 return dst;
3628 }
3629
3630 case Iop_QNarrowBin32Sto16Sx8:
3631 op = Xsse_PACKSSD; arg1isEReg = True; goto do_SseReRg;
3632 case Iop_QNarrowBin16Sto8Sx16:
3633 op = Xsse_PACKSSW; arg1isEReg = True; goto do_SseReRg;
3634 case Iop_QNarrowBin16Sto8Ux16:
3635 op = Xsse_PACKUSW; arg1isEReg = True; goto do_SseReRg;
3636
3637 case Iop_InterleaveHI8x16:
3638 op = Xsse_UNPCKHB; arg1isEReg = True; goto do_SseReRg;
3639 case Iop_InterleaveHI16x8:
3640 op = Xsse_UNPCKHW; arg1isEReg = True; goto do_SseReRg;
3641 case Iop_InterleaveHI32x4:
3642 op = Xsse_UNPCKHD; arg1isEReg = True; goto do_SseReRg;
3643 case Iop_InterleaveHI64x2:
3644 op = Xsse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg;
3645
3646 case Iop_InterleaveLO8x16:
3647 op = Xsse_UNPCKLB; arg1isEReg = True; goto do_SseReRg;
3648 case Iop_InterleaveLO16x8:
3649 op = Xsse_UNPCKLW; arg1isEReg = True; goto do_SseReRg;
3650 case Iop_InterleaveLO32x4:
3651 op = Xsse_UNPCKLD; arg1isEReg = True; goto do_SseReRg;
3652 case Iop_InterleaveLO64x2:
3653 op = Xsse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg;
3654
3655 case Iop_AndV128: op = Xsse_AND; goto do_SseReRg;
3656 case Iop_OrV128: op = Xsse_OR; goto do_SseReRg;
3657 case Iop_XorV128: op = Xsse_XOR; goto do_SseReRg;
3658 case Iop_Add8x16: op = Xsse_ADD8; goto do_SseReRg;
3659 case Iop_Add16x8: op = Xsse_ADD16; goto do_SseReRg;
3660 case Iop_Add32x4: op = Xsse_ADD32; goto do_SseReRg;
3661 case Iop_Add64x2: op = Xsse_ADD64; goto do_SseReRg;
3662 case Iop_QAdd8Sx16: op = Xsse_QADD8S; goto do_SseReRg;
3663 case Iop_QAdd16Sx8: op = Xsse_QADD16S; goto do_SseReRg;
3664 case Iop_QAdd8Ux16: op = Xsse_QADD8U; goto do_SseReRg;
3665 case Iop_QAdd16Ux8: op = Xsse_QADD16U; goto do_SseReRg;
3666 case Iop_Avg8Ux16: op = Xsse_AVG8U; goto do_SseReRg;
3667 case Iop_Avg16Ux8: op = Xsse_AVG16U; goto do_SseReRg;
3668 case Iop_CmpEQ8x16: op = Xsse_CMPEQ8; goto do_SseReRg;
3669 case Iop_CmpEQ16x8: op = Xsse_CMPEQ16; goto do_SseReRg;
3670 case Iop_CmpEQ32x4: op = Xsse_CMPEQ32; goto do_SseReRg;
3671 case Iop_CmpGT8Sx16: op = Xsse_CMPGT8S; goto do_SseReRg;
3672 case Iop_CmpGT16Sx8: op = Xsse_CMPGT16S; goto do_SseReRg;
3673 case Iop_CmpGT32Sx4: op = Xsse_CMPGT32S; goto do_SseReRg;
3674 case Iop_Max16Sx8: op = Xsse_MAX16S; goto do_SseReRg;
3675 case Iop_Max8Ux16: op = Xsse_MAX8U; goto do_SseReRg;
3676 case Iop_Min16Sx8: op = Xsse_MIN16S; goto do_SseReRg;
3677 case Iop_Min8Ux16: op = Xsse_MIN8U; goto do_SseReRg;
3678 case Iop_MulHi16Ux8: op = Xsse_MULHI16U; goto do_SseReRg;
3679 case Iop_MulHi16Sx8: op = Xsse_MULHI16S; goto do_SseReRg;
3680 case Iop_Mul16x8: op = Xsse_MUL16; goto do_SseReRg;
3681 case Iop_Sub8x16: op = Xsse_SUB8; goto do_SseReRg;
3682 case Iop_Sub16x8: op = Xsse_SUB16; goto do_SseReRg;
3683 case Iop_Sub32x4: op = Xsse_SUB32; goto do_SseReRg;
3684 case Iop_Sub64x2: op = Xsse_SUB64; goto do_SseReRg;
3685 case Iop_QSub8Sx16: op = Xsse_QSUB8S; goto do_SseReRg;
3686 case Iop_QSub16Sx8: op = Xsse_QSUB16S; goto do_SseReRg;
3687 case Iop_QSub8Ux16: op = Xsse_QSUB8U; goto do_SseReRg;
3688 case Iop_QSub16Ux8: op = Xsse_QSUB16U; goto do_SseReRg;
3689 do_SseReRg: {
3690 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
3691 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
3692 HReg dst = newVRegV(env);
3693 if (op != Xsse_OR && op != Xsse_AND && op != Xsse_XOR)
3694 REQUIRE_SSE2;
3695 if (arg1isEReg) {
3696 addInstr(env, mk_vMOVsd_RR(arg2, dst));
3697 addInstr(env, X86Instr_SseReRg(op, arg1, dst));
3698 } else {
3699 addInstr(env, mk_vMOVsd_RR(arg1, dst));
3700 addInstr(env, X86Instr_SseReRg(op, arg2, dst));
3701 }
3702 return dst;
3703 }
3704
3705 case Iop_ShlN16x8: op = Xsse_SHL16; goto do_SseShift;
3706 case Iop_ShlN32x4: op = Xsse_SHL32; goto do_SseShift;
3707 case Iop_ShlN64x2: op = Xsse_SHL64; goto do_SseShift;
3708 case Iop_SarN16x8: op = Xsse_SAR16; goto do_SseShift;
3709 case Iop_SarN32x4: op = Xsse_SAR32; goto do_SseShift;
3710 case Iop_ShrN16x8: op = Xsse_SHR16; goto do_SseShift;
3711 case Iop_ShrN32x4: op = Xsse_SHR32; goto do_SseShift;
3712 case Iop_ShrN64x2: op = Xsse_SHR64; goto do_SseShift;
3713 do_SseShift: {
3714 HReg greg = iselVecExpr(env, e->Iex.Binop.arg1);
3715 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
3716 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3717 HReg ereg = newVRegV(env);
3718 HReg dst = newVRegV(env);
3719 REQUIRE_SSE2;
3720 addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3721 addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3722 addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3723 addInstr(env, X86Instr_Push(rmi));
3724 addInstr(env, X86Instr_SseLdSt(True/*load*/, ereg, esp0));
3725 addInstr(env, mk_vMOVsd_RR(greg, dst));
3726 addInstr(env, X86Instr_SseReRg(op, ereg, dst));
3727 add_to_esp(env, 16);
3728 return dst;
3729 }
3730
3731 case Iop_NarrowBin32to16x8:
3732 fn = (HWord)h_generic_calc_NarrowBin32to16x8;
3733 goto do_SseAssistedBinary;
3734 case Iop_NarrowBin16to8x16:
3735 fn = (HWord)h_generic_calc_NarrowBin16to8x16;
3736 goto do_SseAssistedBinary;
3737 do_SseAssistedBinary: {
3738 /* As with the amd64 case (where this is copied from) we
3739 generate pretty bad code. */
3740 vassert(fn != 0);
3741 HReg dst = newVRegV(env);
3742 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3743 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3744 HReg argp = newVRegI(env);
3745 /* subl $112, %esp -- make a space */
3746 sub_from_esp(env, 112);
3747 /* leal 48(%esp), %r_argp -- point into it */
3748 addInstr(env, X86Instr_Lea32(X86AMode_IR(48, hregX86_ESP()),
3749 argp));
3750 /* andl $-16, %r_argp -- 16-align the pointer */
3751 addInstr(env, X86Instr_Alu32R(Xalu_AND,
3752 X86RMI_Imm( ~(UInt)15 ),
3753 argp));
3754 /* Prepare 3 arg regs:
3755 leal 0(%r_argp), %eax
3756 leal 16(%r_argp), %edx
3757 leal 32(%r_argp), %ecx
3758 */
3759 addInstr(env, X86Instr_Lea32(X86AMode_IR(0, argp),
3760 hregX86_EAX()));
3761 addInstr(env, X86Instr_Lea32(X86AMode_IR(16, argp),
3762 hregX86_EDX()));
3763 addInstr(env, X86Instr_Lea32(X86AMode_IR(32, argp),
3764 hregX86_ECX()));
3765 /* Store the two args, at (%edx) and (%ecx):
3766 movupd %argL, 0(%edx)
3767 movupd %argR, 0(%ecx)
3768 */
3769 addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argL,
3770 X86AMode_IR(0, hregX86_EDX())));
3771 addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argR,
3772 X86AMode_IR(0, hregX86_ECX())));
3773 /* call the helper */
3774 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
3775 3, mk_RetLoc_simple(RLPri_None) ));
3776 /* fetch the result from memory, using %r_argp, which the
3777 register allocator will keep alive across the call. */
3778 addInstr(env, X86Instr_SseLdSt(True/*isLoad*/, dst,
3779 X86AMode_IR(0, argp)));
3780 /* and finally, clear the space */
3781 add_to_esp(env, 112);
3782 return dst;
3783 }
3784
3785 default:
3786 break;
3787 } /* switch (e->Iex.Binop.op) */
3788 } /* if (e->tag == Iex_Binop) */
3789
3790
3791 if (e->tag == Iex_Triop) {
3792 IRTriop *triop = e->Iex.Triop.details;
3793 switch (triop->op) {
3794
3795 case Iop_Add32Fx4: op = Xsse_ADDF; goto do_32Fx4_w_rm;
3796 case Iop_Sub32Fx4: op = Xsse_SUBF; goto do_32Fx4_w_rm;
3797 case Iop_Mul32Fx4: op = Xsse_MULF; goto do_32Fx4_w_rm;
3798 case Iop_Div32Fx4: op = Xsse_DIVF; goto do_32Fx4_w_rm;
3799 do_32Fx4_w_rm:
3800 {
3801 HReg argL = iselVecExpr(env, triop->arg2);
3802 HReg argR = iselVecExpr(env, triop->arg3);
3803 HReg dst = newVRegV(env);
3804 addInstr(env, mk_vMOVsd_RR(argL, dst));
3805 /* XXXROUNDINGFIXME */
3806 /* set roundingmode here */
3807 addInstr(env, X86Instr_Sse32Fx4(op, argR, dst));
3808 return dst;
3809 }
3810
3811 case Iop_Add64Fx2: op = Xsse_ADDF; goto do_64Fx2_w_rm;
3812 case Iop_Sub64Fx2: op = Xsse_SUBF; goto do_64Fx2_w_rm;
3813 case Iop_Mul64Fx2: op = Xsse_MULF; goto do_64Fx2_w_rm;
3814 case Iop_Div64Fx2: op = Xsse_DIVF; goto do_64Fx2_w_rm;
3815 do_64Fx2_w_rm:
3816 {
3817 HReg argL = iselVecExpr(env, triop->arg2);
3818 HReg argR = iselVecExpr(env, triop->arg3);
3819 HReg dst = newVRegV(env);
3820 REQUIRE_SSE2;
3821 addInstr(env, mk_vMOVsd_RR(argL, dst));
3822 /* XXXROUNDINGFIXME */
3823 /* set roundingmode here */
3824 addInstr(env, X86Instr_Sse64Fx2(op, argR, dst));
3825 return dst;
3826 }
3827
3828 default:
3829 break;
3830 } /* switch (triop->op) */
3831 } /* if (e->tag == Iex_Triop) */
3832
3833
3834 if (e->tag == Iex_ITE) { // VFD
3835 HReg r1 = iselVecExpr(env, e->Iex.ITE.iftrue);
3836 HReg r0 = iselVecExpr(env, e->Iex.ITE.iffalse);
3837 HReg dst = newVRegV(env);
3838 addInstr(env, mk_vMOVsd_RR(r1,dst));
3839 X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
3840 addInstr(env, X86Instr_SseCMov(cc ^ 1, r0, dst));
3841 return dst;
3842 }
3843
3844 vec_fail:
3845 vex_printf("iselVecExpr (hwcaps = %s): can't reduce\n",
3846 LibVEX_ppVexHwCaps(VexArchX86,env->hwcaps));
3847 ppIRExpr(e);
3848 vpanic("iselVecExpr_wrk");
3849
3850 # undef REQUIRE_SSE1
3851 # undef REQUIRE_SSE2
3852 # undef SSE2_OR_ABOVE
3853 }
3854
3855
3856 /*---------------------------------------------------------*/
3857 /*--- ISEL: Statements ---*/
3858 /*---------------------------------------------------------*/
3859
iselStmt(ISelEnv * env,IRStmt * stmt)3860 static void iselStmt ( ISelEnv* env, IRStmt* stmt )
3861 {
3862 if (vex_traceflags & VEX_TRACE_VCODE) {
3863 vex_printf("\n-- ");
3864 ppIRStmt(stmt);
3865 vex_printf("\n");
3866 }
3867
3868 switch (stmt->tag) {
3869
3870 /* --------- STORE --------- */
3871 case Ist_Store: {
3872 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
3873 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
3874 IREndness end = stmt->Ist.Store.end;
3875
3876 if (tya != Ity_I32 || end != Iend_LE)
3877 goto stmt_fail;
3878
3879 if (tyd == Ity_I32) {
3880 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3881 X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Store.data);
3882 addInstr(env, X86Instr_Alu32M(Xalu_MOV,ri,am));
3883 return;
3884 }
3885 if (tyd == Ity_I8 || tyd == Ity_I16) {
3886 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3887 HReg r = iselIntExpr_R(env, stmt->Ist.Store.data);
3888 addInstr(env, X86Instr_Store( toUChar(tyd==Ity_I8 ? 1 : 2),
3889 r,am ));
3890 return;
3891 }
3892 if (tyd == Ity_F64) {
3893 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3894 HReg r = iselDblExpr(env, stmt->Ist.Store.data);
3895 addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, r, am));
3896 return;
3897 }
3898 if (tyd == Ity_F32) {
3899 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3900 HReg r = iselFltExpr(env, stmt->Ist.Store.data);
3901 addInstr(env, X86Instr_FpLdSt(False/*store*/, 4, r, am));
3902 return;
3903 }
3904 if (tyd == Ity_I64) {
3905 HReg vHi, vLo, rA;
3906 iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Store.data);
3907 rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
3908 addInstr(env, X86Instr_Alu32M(
3909 Xalu_MOV, X86RI_Reg(vLo), X86AMode_IR(0, rA)));
3910 addInstr(env, X86Instr_Alu32M(
3911 Xalu_MOV, X86RI_Reg(vHi), X86AMode_IR(4, rA)));
3912 return;
3913 }
3914 if (tyd == Ity_V128) {
3915 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3916 HReg r = iselVecExpr(env, stmt->Ist.Store.data);
3917 addInstr(env, X86Instr_SseLdSt(False/*store*/, r, am));
3918 return;
3919 }
3920 break;
3921 }
3922
3923 /* --------- PUT --------- */
3924 case Ist_Put: {
3925 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
3926 if (ty == Ity_I32) {
3927 /* We're going to write to memory, so compute the RHS into an
3928 X86RI. */
3929 X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data);
3930 addInstr(env,
3931 X86Instr_Alu32M(
3932 Xalu_MOV,
3933 ri,
3934 X86AMode_IR(stmt->Ist.Put.offset,hregX86_EBP())
3935 ));
3936 return;
3937 }
3938 if (ty == Ity_I8 || ty == Ity_I16) {
3939 HReg r = iselIntExpr_R(env, stmt->Ist.Put.data);
3940 addInstr(env, X86Instr_Store(
3941 toUChar(ty==Ity_I8 ? 1 : 2),
3942 r,
3943 X86AMode_IR(stmt->Ist.Put.offset,
3944 hregX86_EBP())));
3945 return;
3946 }
3947 if (ty == Ity_I64) {
3948 HReg vHi, vLo;
3949 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
3950 X86AMode* am4 = advance4(am);
3951 iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Put.data);
3952 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vLo), am ));
3953 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vHi), am4 ));
3954 return;
3955 }
3956 if (ty == Ity_V128) {
3957 HReg vec = iselVecExpr(env, stmt->Ist.Put.data);
3958 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
3959 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, am));
3960 return;
3961 }
3962 if (ty == Ity_F32) {
3963 HReg f32 = iselFltExpr(env, stmt->Ist.Put.data);
3964 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
3965 set_FPU_rounding_default(env); /* paranoia */
3966 addInstr(env, X86Instr_FpLdSt( False/*store*/, 4, f32, am ));
3967 return;
3968 }
3969 if (ty == Ity_F64) {
3970 HReg f64 = iselDblExpr(env, stmt->Ist.Put.data);
3971 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
3972 set_FPU_rounding_default(env); /* paranoia */
3973 addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, f64, am ));
3974 return;
3975 }
3976 break;
3977 }
3978
3979 /* --------- Indexed PUT --------- */
3980 case Ist_PutI: {
3981 IRPutI *puti = stmt->Ist.PutI.details;
3982
3983 X86AMode* am
3984 = genGuestArrayOffset(
3985 env, puti->descr,
3986 puti->ix, puti->bias );
3987
3988 IRType ty = typeOfIRExpr(env->type_env, puti->data);
3989 if (ty == Ity_F64) {
3990 HReg val = iselDblExpr(env, puti->data);
3991 addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, val, am ));
3992 return;
3993 }
3994 if (ty == Ity_I8) {
3995 HReg r = iselIntExpr_R(env, puti->data);
3996 addInstr(env, X86Instr_Store( 1, r, am ));
3997 return;
3998 }
3999 if (ty == Ity_I32) {
4000 HReg r = iselIntExpr_R(env, puti->data);
4001 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(r), am ));
4002 return;
4003 }
4004 if (ty == Ity_I64) {
4005 HReg rHi, rLo;
4006 X86AMode* am4 = advance4(am);
4007 iselInt64Expr(&rHi, &rLo, env, puti->data);
4008 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rLo), am ));
4009 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rHi), am4 ));
4010 return;
4011 }
4012 break;
4013 }
4014
4015 /* --------- TMP --------- */
4016 case Ist_WrTmp: {
4017 IRTemp tmp = stmt->Ist.WrTmp.tmp;
4018 IRType ty = typeOfIRTemp(env->type_env, tmp);
4019
4020 /* optimisation: if stmt->Ist.WrTmp.data is Add32(..,..),
4021 compute it into an AMode and then use LEA. This usually
4022 produces fewer instructions, often because (for memcheck
4023 created IR) we get t = address-expression, (t is later used
4024 twice) and so doing this naturally turns address-expression
4025 back into an X86 amode. */
4026 if (ty == Ity_I32
4027 && stmt->Ist.WrTmp.data->tag == Iex_Binop
4028 && stmt->Ist.WrTmp.data->Iex.Binop.op == Iop_Add32) {
4029 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.WrTmp.data);
4030 HReg dst = lookupIRTemp(env, tmp);
4031 if (am->tag == Xam_IR && am->Xam.IR.imm == 0) {
4032 /* Hmm, iselIntExpr_AMode wimped out and just computed the
4033 value into a register. Just emit a normal reg-reg move
4034 so reg-alloc can coalesce it away in the usual way. */
4035 HReg src = am->Xam.IR.reg;
4036 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst));
4037 } else {
4038 addInstr(env, X86Instr_Lea32(am,dst));
4039 }
4040 return;
4041 }
4042
4043 if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
4044 X86RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.WrTmp.data);
4045 HReg dst = lookupIRTemp(env, tmp);
4046 addInstr(env, X86Instr_Alu32R(Xalu_MOV,rmi,dst));
4047 return;
4048 }
4049 if (ty == Ity_I64) {
4050 HReg rHi, rLo, dstHi, dstLo;
4051 iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
4052 lookupIRTemp64( &dstHi, &dstLo, env, tmp);
4053 addInstr(env, mk_iMOVsd_RR(rHi,dstHi) );
4054 addInstr(env, mk_iMOVsd_RR(rLo,dstLo) );
4055 return;
4056 }
4057 if (ty == Ity_I1) {
4058 X86CondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
4059 HReg dst = lookupIRTemp(env, tmp);
4060 addInstr(env, X86Instr_Set32(cond, dst));
4061 return;
4062 }
4063 if (ty == Ity_F64) {
4064 HReg dst = lookupIRTemp(env, tmp);
4065 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
4066 addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst));
4067 return;
4068 }
4069 if (ty == Ity_F32) {
4070 HReg dst = lookupIRTemp(env, tmp);
4071 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
4072 addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst));
4073 return;
4074 }
4075 if (ty == Ity_V128) {
4076 HReg dst = lookupIRTemp(env, tmp);
4077 HReg src = iselVecExpr(env, stmt->Ist.WrTmp.data);
4078 addInstr(env, mk_vMOVsd_RR(src,dst));
4079 return;
4080 }
4081 break;
4082 }
4083
4084 /* --------- Call to DIRTY helper --------- */
4085 case Ist_Dirty: {
4086 IRDirty* d = stmt->Ist.Dirty.details;
4087
4088 /* Figure out the return type, if any. */
4089 IRType retty = Ity_INVALID;
4090 if (d->tmp != IRTemp_INVALID)
4091 retty = typeOfIRTemp(env->type_env, d->tmp);
4092
4093 Bool retty_ok = False;
4094 switch (retty) {
4095 case Ity_INVALID: /* function doesn't return anything */
4096 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
4097 case Ity_V128:
4098 retty_ok = True; break;
4099 default:
4100 break;
4101 }
4102 if (!retty_ok)
4103 break; /* will go to stmt_fail: */
4104
4105 /* Marshal args, do the call, and set the return value to
4106 0x555..555 if this is a conditional call that returns a value
4107 and the call is skipped. */
4108 UInt addToSp = 0;
4109 RetLoc rloc = mk_RetLoc_INVALID();
4110 doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
4111 vassert(is_sane_RetLoc(rloc));
4112
4113 /* Now figure out what to do with the returned value, if any. */
4114 switch (retty) {
4115 case Ity_INVALID: {
4116 /* No return value. Nothing to do. */
4117 vassert(d->tmp == IRTemp_INVALID);
4118 vassert(rloc.pri == RLPri_None);
4119 vassert(addToSp == 0);
4120 return;
4121 }
4122 case Ity_I32: case Ity_I16: case Ity_I8: {
4123 /* The returned value is in %eax. Park it in the register
4124 associated with tmp. */
4125 vassert(rloc.pri == RLPri_Int);
4126 vassert(addToSp == 0);
4127 HReg dst = lookupIRTemp(env, d->tmp);
4128 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dst) );
4129 return;
4130 }
4131 case Ity_I64: {
4132 /* The returned value is in %edx:%eax. Park it in the
4133 register-pair associated with tmp. */
4134 vassert(rloc.pri == RLPri_2Int);
4135 vassert(addToSp == 0);
4136 HReg dstHi, dstLo;
4137 lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
4138 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(),dstHi) );
4139 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dstLo) );
4140 return;
4141 }
4142 case Ity_V128: {
4143 /* The returned value is on the stack, and *retloc tells
4144 us where. Fish it off the stack and then move the
4145 stack pointer upwards to clear it, as directed by
4146 doHelperCall. */
4147 vassert(rloc.pri == RLPri_V128SpRel);
4148 vassert(addToSp >= 16);
4149 HReg dst = lookupIRTemp(env, d->tmp);
4150 X86AMode* am = X86AMode_IR(rloc.spOff, hregX86_ESP());
4151 addInstr(env, X86Instr_SseLdSt( True/*load*/, dst, am ));
4152 add_to_esp(env, addToSp);
4153 return;
4154 }
4155 default:
4156 /*NOTREACHED*/
4157 vassert(0);
4158 }
4159 break;
4160 }
4161
4162 /* --------- MEM FENCE --------- */
4163 case Ist_MBE:
4164 switch (stmt->Ist.MBE.event) {
4165 case Imbe_Fence:
4166 addInstr(env, X86Instr_MFence(env->hwcaps));
4167 return;
4168 default:
4169 break;
4170 }
4171 break;
4172
4173 /* --------- ACAS --------- */
4174 case Ist_CAS:
4175 if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
4176 /* "normal" singleton CAS */
4177 UChar sz;
4178 IRCAS* cas = stmt->Ist.CAS.details;
4179 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
4180 /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
4181 X86AMode* am = iselIntExpr_AMode(env, cas->addr);
4182 HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
4183 HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
4184 HReg rOldLo = lookupIRTemp(env, cas->oldLo);
4185 vassert(cas->expdHi == NULL);
4186 vassert(cas->dataHi == NULL);
4187 addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
4188 addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX()));
4189 addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX()));
4190 switch (ty) {
4191 case Ity_I32: sz = 4; break;
4192 case Ity_I16: sz = 2; break;
4193 case Ity_I8: sz = 1; break;
4194 default: goto unhandled_cas;
4195 }
4196 addInstr(env, X86Instr_ACAS(am, sz));
4197 addInstr(env,
4198 X86Instr_CMov32(Xcc_NZ,
4199 X86RM_Reg(hregX86_EAX()), rOldLo));
4200 return;
4201 } else {
4202 /* double CAS */
4203 IRCAS* cas = stmt->Ist.CAS.details;
4204 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
4205 /* only 32-bit allowed in this case */
4206 /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
4207 /* get: cas->expdHi into %edx, and cas->dataHi into %ecx */
4208 X86AMode* am = iselIntExpr_AMode(env, cas->addr);
4209 HReg rDataHi = iselIntExpr_R(env, cas->dataHi);
4210 HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
4211 HReg rExpdHi = iselIntExpr_R(env, cas->expdHi);
4212 HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
4213 HReg rOldHi = lookupIRTemp(env, cas->oldHi);
4214 HReg rOldLo = lookupIRTemp(env, cas->oldLo);
4215 if (ty != Ity_I32)
4216 goto unhandled_cas;
4217 addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi));
4218 addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
4219 addInstr(env, mk_iMOVsd_RR(rExpdHi, hregX86_EDX()));
4220 addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX()));
4221 addInstr(env, mk_iMOVsd_RR(rDataHi, hregX86_ECX()));
4222 addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX()));
4223 addInstr(env, X86Instr_DACAS(am));
4224 addInstr(env,
4225 X86Instr_CMov32(Xcc_NZ,
4226 X86RM_Reg(hregX86_EDX()), rOldHi));
4227 addInstr(env,
4228 X86Instr_CMov32(Xcc_NZ,
4229 X86RM_Reg(hregX86_EAX()), rOldLo));
4230 return;
4231 }
4232 unhandled_cas:
4233 break;
4234
4235 /* --------- INSTR MARK --------- */
4236 /* Doesn't generate any executable code ... */
4237 case Ist_IMark:
4238 return;
4239
4240 /* --------- NO-OP --------- */
4241 /* Fairly self-explanatory, wouldn't you say? */
4242 case Ist_NoOp:
4243 return;
4244
4245 /* --------- EXIT --------- */
4246 case Ist_Exit: {
4247 if (stmt->Ist.Exit.dst->tag != Ico_U32)
4248 vpanic("iselStmt(x86): Ist_Exit: dst is not a 32-bit value");
4249
4250 X86CondCode cc = iselCondCode(env, stmt->Ist.Exit.guard);
4251 X86AMode* amEIP = X86AMode_IR(stmt->Ist.Exit.offsIP,
4252 hregX86_EBP());
4253
4254 /* Case: boring transfer to known address */
4255 if (stmt->Ist.Exit.jk == Ijk_Boring) {
4256 if (env->chainingAllowed) {
4257 /* .. almost always true .. */
4258 /* Skip the event check at the dst if this is a forwards
4259 edge. */
4260 Bool toFastEP
4261 = ((Addr32)stmt->Ist.Exit.dst->Ico.U32) > env->max_ga;
4262 if (0) vex_printf("%s", toFastEP ? "Y" : ",");
4263 addInstr(env, X86Instr_XDirect(stmt->Ist.Exit.dst->Ico.U32,
4264 amEIP, cc, toFastEP));
4265 } else {
4266 /* .. very occasionally .. */
4267 /* We can't use chaining, so ask for an assisted transfer,
4268 as that's the only alternative that is allowable. */
4269 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4270 addInstr(env, X86Instr_XAssisted(r, amEIP, cc, Ijk_Boring));
4271 }
4272 return;
4273 }
4274
4275 /* Case: assisted transfer to arbitrary address */
4276 switch (stmt->Ist.Exit.jk) {
4277 /* Keep this list in sync with that in iselNext below */
4278 case Ijk_ClientReq:
4279 case Ijk_EmWarn:
4280 case Ijk_MapFail:
4281 case Ijk_NoDecode:
4282 case Ijk_NoRedir:
4283 case Ijk_SigSEGV:
4284 case Ijk_SigTRAP:
4285 case Ijk_Sys_int128:
4286 case Ijk_Sys_int129:
4287 case Ijk_Sys_int130:
4288 case Ijk_Sys_int145:
4289 case Ijk_Sys_int210:
4290 case Ijk_Sys_syscall:
4291 case Ijk_Sys_sysenter:
4292 case Ijk_InvalICache:
4293 case Ijk_Yield:
4294 {
4295 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4296 addInstr(env, X86Instr_XAssisted(r, amEIP, cc, stmt->Ist.Exit.jk));
4297 return;
4298 }
4299 default:
4300 break;
4301 }
4302
4303 /* Do we ever expect to see any other kind? */
4304 goto stmt_fail;
4305 }
4306
4307 default: break;
4308 }
4309 stmt_fail:
4310 ppIRStmt(stmt);
4311 vpanic("iselStmt");
4312 }
4313
4314
4315 /*---------------------------------------------------------*/
4316 /*--- ISEL: Basic block terminators (Nexts) ---*/
4317 /*---------------------------------------------------------*/
4318
iselNext(ISelEnv * env,IRExpr * next,IRJumpKind jk,Int offsIP)4319 static void iselNext ( ISelEnv* env,
4320 IRExpr* next, IRJumpKind jk, Int offsIP )
4321 {
4322 if (vex_traceflags & VEX_TRACE_VCODE) {
4323 vex_printf( "\n-- PUT(%d) = ", offsIP);
4324 ppIRExpr( next );
4325 vex_printf( "; exit-");
4326 ppIRJumpKind(jk);
4327 vex_printf( "\n");
4328 }
4329
4330 /* Case: boring transfer to known address */
4331 if (next->tag == Iex_Const) {
4332 IRConst* cdst = next->Iex.Const.con;
4333 vassert(cdst->tag == Ico_U32);
4334 if (jk == Ijk_Boring || jk == Ijk_Call) {
4335 /* Boring transfer to known address */
4336 X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
4337 if (env->chainingAllowed) {
4338 /* .. almost always true .. */
4339 /* Skip the event check at the dst if this is a forwards
4340 edge. */
4341 Bool toFastEP
4342 = ((Addr32)cdst->Ico.U32) > env->max_ga;
4343 if (0) vex_printf("%s", toFastEP ? "X" : ".");
4344 addInstr(env, X86Instr_XDirect(cdst->Ico.U32,
4345 amEIP, Xcc_ALWAYS,
4346 toFastEP));
4347 } else {
4348 /* .. very occasionally .. */
4349 /* We can't use chaining, so ask for an assisted transfer,
4350 as that's the only alternative that is allowable. */
4351 HReg r = iselIntExpr_R(env, next);
4352 addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS,
4353 Ijk_Boring));
4354 }
4355 return;
4356 }
4357 }
4358
4359 /* Case: call/return (==boring) transfer to any address */
4360 switch (jk) {
4361 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
4362 HReg r = iselIntExpr_R(env, next);
4363 X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
4364 if (env->chainingAllowed) {
4365 addInstr(env, X86Instr_XIndir(r, amEIP, Xcc_ALWAYS));
4366 } else {
4367 addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS,
4368 Ijk_Boring));
4369 }
4370 return;
4371 }
4372 default:
4373 break;
4374 }
4375
4376 /* Case: assisted transfer to arbitrary address */
4377 switch (jk) {
4378 /* Keep this list in sync with that for Ist_Exit above */
4379 case Ijk_ClientReq:
4380 case Ijk_EmWarn:
4381 case Ijk_MapFail:
4382 case Ijk_NoDecode:
4383 case Ijk_NoRedir:
4384 case Ijk_SigSEGV:
4385 case Ijk_SigTRAP:
4386 case Ijk_Sys_int128:
4387 case Ijk_Sys_int129:
4388 case Ijk_Sys_int130:
4389 case Ijk_Sys_int145:
4390 case Ijk_Sys_int210:
4391 case Ijk_Sys_syscall:
4392 case Ijk_Sys_sysenter:
4393 case Ijk_InvalICache:
4394 case Ijk_Yield:
4395 {
4396 HReg r = iselIntExpr_R(env, next);
4397 X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
4398 addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS, jk));
4399 return;
4400 }
4401 default:
4402 break;
4403 }
4404
4405 vex_printf( "\n-- PUT(%d) = ", offsIP);
4406 ppIRExpr( next );
4407 vex_printf( "; exit-");
4408 ppIRJumpKind(jk);
4409 vex_printf( "\n");
4410 vassert(0); // are we expecting any other kind?
4411 }
4412
4413
4414 /*---------------------------------------------------------*/
4415 /*--- Insn selector top-level ---*/
4416 /*---------------------------------------------------------*/
4417
4418 /* Translate an entire SB to x86 code. */
4419
iselSB_X86(const IRSB * bb,VexArch arch_host,const VexArchInfo * archinfo_host,const VexAbiInfo * vbi,Int offs_Host_EvC_Counter,Int offs_Host_EvC_FailAddr,Bool chainingAllowed,Bool addProfInc,Addr max_ga)4420 HInstrArray* iselSB_X86 ( const IRSB* bb,
4421 VexArch arch_host,
4422 const VexArchInfo* archinfo_host,
4423 const VexAbiInfo* vbi/*UNUSED*/,
4424 Int offs_Host_EvC_Counter,
4425 Int offs_Host_EvC_FailAddr,
4426 Bool chainingAllowed,
4427 Bool addProfInc,
4428 Addr max_ga )
4429 {
4430 Int i, j;
4431 HReg hreg, hregHI;
4432 ISelEnv* env;
4433 UInt hwcaps_host = archinfo_host->hwcaps;
4434 X86AMode *amCounter, *amFailAddr;
4435
4436 /* sanity ... */
4437 vassert(arch_host == VexArchX86);
4438 vassert(0 == (hwcaps_host
4439 & ~(VEX_HWCAPS_X86_MMXEXT
4440 | VEX_HWCAPS_X86_SSE1
4441 | VEX_HWCAPS_X86_SSE2
4442 | VEX_HWCAPS_X86_SSE3
4443 | VEX_HWCAPS_X86_LZCNT)));
4444
4445 /* Check that the host's endianness is as expected. */
4446 vassert(archinfo_host->endness == VexEndnessLE);
4447
4448 /* Make up an initial environment to use. */
4449 env = LibVEX_Alloc_inline(sizeof(ISelEnv));
4450 env->vreg_ctr = 0;
4451
4452 /* Set up output code array. */
4453 env->code = newHInstrArray();
4454
4455 /* Copy BB's type env. */
4456 env->type_env = bb->tyenv;
4457
4458 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
4459 change as we go along. */
4460 env->n_vregmap = bb->tyenv->types_used;
4461 env->vregmap = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
4462 env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
4463
4464 /* and finally ... */
4465 env->chainingAllowed = chainingAllowed;
4466 env->hwcaps = hwcaps_host;
4467 env->max_ga = max_ga;
4468
4469 /* For each IR temporary, allocate a suitably-kinded virtual
4470 register. */
4471 j = 0;
4472 for (i = 0; i < env->n_vregmap; i++) {
4473 hregHI = hreg = INVALID_HREG;
4474 switch (bb->tyenv->types[i]) {
4475 case Ity_I1:
4476 case Ity_I8:
4477 case Ity_I16:
4478 case Ity_I32: hreg = mkHReg(True, HRcInt32, 0, j++); break;
4479 case Ity_I64: hreg = mkHReg(True, HRcInt32, 0, j++);
4480 hregHI = mkHReg(True, HRcInt32, 0, j++); break;
4481 case Ity_F32:
4482 case Ity_F64: hreg = mkHReg(True, HRcFlt64, 0, j++); break;
4483 case Ity_V128: hreg = mkHReg(True, HRcVec128, 0, j++); break;
4484 default: ppIRType(bb->tyenv->types[i]);
4485 vpanic("iselBB: IRTemp type");
4486 }
4487 env->vregmap[i] = hreg;
4488 env->vregmapHI[i] = hregHI;
4489 }
4490 env->vreg_ctr = j;
4491
4492 /* The very first instruction must be an event check. */
4493 amCounter = X86AMode_IR(offs_Host_EvC_Counter, hregX86_EBP());
4494 amFailAddr = X86AMode_IR(offs_Host_EvC_FailAddr, hregX86_EBP());
4495 addInstr(env, X86Instr_EvCheck(amCounter, amFailAddr));
4496
4497 /* Possibly a block counter increment (for profiling). At this
4498 point we don't know the address of the counter, so just pretend
4499 it is zero. It will have to be patched later, but before this
4500 translation is used, by a call to LibVEX_patchProfCtr. */
4501 if (addProfInc) {
4502 addInstr(env, X86Instr_ProfInc());
4503 }
4504
4505 /* Ok, finally we can iterate over the statements. */
4506 for (i = 0; i < bb->stmts_used; i++)
4507 iselStmt(env, bb->stmts[i]);
4508
4509 iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
4510
4511 /* record the number of vregs we used. */
4512 env->code->n_vregs = env->vreg_ctr;
4513 return env->code;
4514 }
4515
4516
4517 /*---------------------------------------------------------------*/
4518 /*--- end host_x86_isel.c ---*/
4519 /*---------------------------------------------------------------*/
4520