1
2 /*--------------------------------------------------------------------*/
3 /*--- begin guest_generic_bb_to_IR.c ---*/
4 /*--------------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
34 */
35
36 #include "libvex_basictypes.h"
37 #include "libvex_ir.h"
38 #include "libvex.h"
39 #include "main_util.h"
40 #include "main_globals.h"
41 #include "guest_generic_bb_to_IR.h"
42
43
44 /* Forwards .. */
45 VEX_REGPARM(2)
46 static UInt genericg_compute_checksum_4al ( HWord first_w32, HWord n_w32s );
47 VEX_REGPARM(1)
48 static UInt genericg_compute_checksum_4al_1 ( HWord first_w32 );
49 VEX_REGPARM(1)
50 static UInt genericg_compute_checksum_4al_2 ( HWord first_w32 );
51 VEX_REGPARM(1)
52 static UInt genericg_compute_checksum_4al_3 ( HWord first_w32 );
53 VEX_REGPARM(1)
54 static UInt genericg_compute_checksum_4al_4 ( HWord first_w32 );
55 VEX_REGPARM(1)
56 static UInt genericg_compute_checksum_4al_5 ( HWord first_w32 );
57 VEX_REGPARM(1)
58 static UInt genericg_compute_checksum_4al_6 ( HWord first_w32 );
59 VEX_REGPARM(1)
60 static UInt genericg_compute_checksum_4al_7 ( HWord first_w32 );
61 VEX_REGPARM(1)
62 static UInt genericg_compute_checksum_4al_8 ( HWord first_w32 );
63 VEX_REGPARM(1)
64 static UInt genericg_compute_checksum_4al_9 ( HWord first_w32 );
65 VEX_REGPARM(1)
66 static UInt genericg_compute_checksum_4al_10 ( HWord first_w32 );
67 VEX_REGPARM(1)
68 static UInt genericg_compute_checksum_4al_11 ( HWord first_w32 );
69 VEX_REGPARM(1)
70 static UInt genericg_compute_checksum_4al_12 ( HWord first_w32 );
71
72 VEX_REGPARM(2)
73 static ULong genericg_compute_checksum_8al ( HWord first_w64, HWord n_w64s );
74 VEX_REGPARM(1)
75 static ULong genericg_compute_checksum_8al_1 ( HWord first_w64 );
76 VEX_REGPARM(1)
77 static ULong genericg_compute_checksum_8al_2 ( HWord first_w64 );
78 VEX_REGPARM(1)
79 static ULong genericg_compute_checksum_8al_3 ( HWord first_w64 );
80 VEX_REGPARM(1)
81 static ULong genericg_compute_checksum_8al_4 ( HWord first_w64 );
82 VEX_REGPARM(1)
83 static ULong genericg_compute_checksum_8al_5 ( HWord first_w64 );
84 VEX_REGPARM(1)
85 static ULong genericg_compute_checksum_8al_6 ( HWord first_w64 );
86 VEX_REGPARM(1)
87 static ULong genericg_compute_checksum_8al_7 ( HWord first_w64 );
88 VEX_REGPARM(1)
89 static ULong genericg_compute_checksum_8al_8 ( HWord first_w64 );
90 VEX_REGPARM(1)
91 static ULong genericg_compute_checksum_8al_9 ( HWord first_w64 );
92 VEX_REGPARM(1)
93 static ULong genericg_compute_checksum_8al_10 ( HWord first_w64 );
94 VEX_REGPARM(1)
95 static ULong genericg_compute_checksum_8al_11 ( HWord first_w64 );
96 VEX_REGPARM(1)
97 static ULong genericg_compute_checksum_8al_12 ( HWord first_w64 );
98
99 /* Small helpers */
const_False(void * callback_opaque,Addr a)100 static Bool const_False ( void* callback_opaque, Addr a ) {
101 return False;
102 }
103
104 /* Disassemble a complete basic block, starting at guest_IP_start,
105 returning a new IRSB. The disassembler may chase across basic
106 block boundaries if it wishes and if chase_into_ok allows it.
107 The precise guest address ranges from which code has been taken
108 are written into vge. guest_IP_bbstart is taken to be the IP in
109 the guest's address space corresponding to the instruction at
110 &guest_code[0].
111
112 dis_instr_fn is the arch-specific fn to disassemble on function; it
113 is this that does the real work.
114
115 needs_self_check is a callback used to ask the caller which of the
116 extents, if any, a self check is required for. The returned value
117 is a bitmask with a 1 in position i indicating that the i'th extent
118 needs a check. Since there can be at most 3 extents, the returned
119 values must be between 0 and 7.
120
121 The number of extents which did get a self check (0 to 3) is put in
122 n_sc_extents. The caller already knows this because it told us
123 which extents to add checks for, via the needs_self_check callback,
124 but we ship the number back out here for the caller's convenience.
125
126 preamble_function is a callback which allows the caller to add
127 its own IR preamble (following the self-check, if any). May be
128 NULL. If non-NULL, the IRSB under construction is handed to
129 this function, which presumably adds IR statements to it. The
130 callback may optionally complete the block and direct bb_to_IR
131 not to disassemble any instructions into it; this is indicated
132 by the callback returning True.
133
134 offB_CMADDR and offB_CMLEN are the offsets of guest_CMADDR and
135 guest_CMLEN. Since this routine has to work for any guest state,
136 without knowing what it is, those offsets have to passed in.
137
138 callback_opaque is a caller-supplied pointer to data which the
139 callbacks may want to see. Vex has no idea what it is.
140 (In fact it's a VgInstrumentClosure.)
141 */
142
143 /* Regarding IP updating. dis_instr_fn (that does the guest specific
144 work of disassembling an individual instruction) must finish the
145 resulting IR with "PUT(guest_IP) = ". Hence in all cases it must
146 state the next instruction address.
147
148 If the block is to be ended at that point, then this routine
149 (bb_to_IR) will set up the next/jumpkind/offsIP fields so as to
150 make a transfer (of the right kind) to "GET(guest_IP)". Hence if
151 dis_instr_fn generates incorrect IP updates we will see it
152 immediately (due to jumping to the wrong next guest address).
153
154 However it is also necessary to set this up so it can be optimised
155 nicely. The IRSB exit is defined to update the guest IP, so that
156 chaining works -- since the chain_me stubs expect the chain-to
157 address to be in the guest state. Hence what the IRSB next fields
158 will contain initially is (implicitly)
159
160 PUT(guest_IP) [implicitly] = GET(guest_IP) [explicit expr on ::next]
161
162 which looks pretty strange at first. Eg so unconditional branch
163 to some address 0x123456 looks like this:
164
165 PUT(guest_IP) = 0x123456; // dis_instr_fn generates this
166 // the exit
167 PUT(guest_IP) [implicitly] = GET(guest_IP); exit-Boring
168
169 after redundant-GET and -PUT removal by iropt, we get what we want:
170
171 // the exit
172 PUT(guest_IP) [implicitly] = 0x123456; exit-Boring
173
174 This makes the IRSB-end case the same as the side-exit case: update
175 IP, then transfer. There is no redundancy of representation for
176 the destination, and we use the destination specified by
177 dis_instr_fn, so any errors it makes show up sooner.
178 */
179
bb_to_IR(VexGuestExtents * vge,UInt * n_sc_extents,UInt * n_guest_instrs,VexRegisterUpdates * pxControl,void * callback_opaque,DisOneInstrFn dis_instr_fn,const UChar * guest_code,Addr guest_IP_bbstart,Bool (* chase_into_ok)(void *,Addr),VexEndness host_endness,Bool sigill_diag,VexArch arch_guest,const VexArchInfo * archinfo_guest,const VexAbiInfo * abiinfo_both,IRType guest_word_type,UInt (* needs_self_check)(void *,VexRegisterUpdates *,const VexGuestExtents *),Bool (* preamble_function)(void *,IRSB *),Int offB_GUEST_CMSTART,Int offB_GUEST_CMLEN,Int offB_GUEST_IP,Int szB_GUEST_IP)180 IRSB* bb_to_IR (
181 /*OUT*/VexGuestExtents* vge,
182 /*OUT*/UInt* n_sc_extents,
183 /*OUT*/UInt* n_guest_instrs, /* stats only */
184 /*MOD*/VexRegisterUpdates* pxControl,
185 /*IN*/ void* callback_opaque,
186 /*IN*/ DisOneInstrFn dis_instr_fn,
187 /*IN*/ const UChar* guest_code,
188 /*IN*/ Addr guest_IP_bbstart,
189 /*IN*/ Bool (*chase_into_ok)(void*,Addr),
190 /*IN*/ VexEndness host_endness,
191 /*IN*/ Bool sigill_diag,
192 /*IN*/ VexArch arch_guest,
193 /*IN*/ const VexArchInfo* archinfo_guest,
194 /*IN*/ const VexAbiInfo* abiinfo_both,
195 /*IN*/ IRType guest_word_type,
196 /*IN*/ UInt (*needs_self_check)
197 (void*, /*MB_MOD*/VexRegisterUpdates*,
198 const VexGuestExtents*),
199 /*IN*/ Bool (*preamble_function)(void*,IRSB*),
200 /*IN*/ Int offB_GUEST_CMSTART,
201 /*IN*/ Int offB_GUEST_CMLEN,
202 /*IN*/ Int offB_GUEST_IP,
203 /*IN*/ Int szB_GUEST_IP
204 )
205 {
206 Long delta;
207 Int i, n_instrs, first_stmt_idx;
208 Bool resteerOK, debug_print;
209 DisResult dres;
210 IRStmt* imark;
211 IRStmt* nop;
212 static Int n_resteers = 0;
213 Int d_resteers = 0;
214 Int selfcheck_idx = 0;
215 IRSB* irsb;
216 Addr guest_IP_curr_instr;
217 IRConst* guest_IP_bbstart_IRConst = NULL;
218 Int n_cond_resteers_allowed = 2;
219
220 Bool (*resteerOKfn)(void*,Addr) = NULL;
221
222 debug_print = toBool(vex_traceflags & VEX_TRACE_FE);
223
224 /* check sanity .. */
225 vassert(sizeof(HWord) == sizeof(void*));
226 vassert(vex_control.guest_max_insns >= 1);
227 vassert(vex_control.guest_max_insns <= 100);
228 vassert(vex_control.guest_chase_thresh >= 0);
229 vassert(vex_control.guest_chase_thresh < vex_control.guest_max_insns);
230 vassert(guest_word_type == Ity_I32 || guest_word_type == Ity_I64);
231
232 if (guest_word_type == Ity_I32) {
233 vassert(szB_GUEST_IP == 4);
234 vassert((offB_GUEST_IP % 4) == 0);
235 } else {
236 vassert(szB_GUEST_IP == 8);
237 vassert((offB_GUEST_IP % 8) == 0);
238 }
239
240 /* Although we will try to disassemble up to vex_control.guest_max_insns
241 insns into the block, the individual insn assemblers may hint to us that a
242 disassembled instruction is verbose. In that case we will lower the limit
243 so as to ensure that the JIT doesn't run out of space. See bug 375839 for
244 the motivating example. */
245 Int guest_max_insns_really = vex_control.guest_max_insns;
246
247 /* Start a new, empty extent. */
248 vge->n_used = 1;
249 vge->base[0] = guest_IP_bbstart;
250 vge->len[0] = 0;
251 *n_sc_extents = 0;
252
253 /* And a new IR superblock to dump the result into. */
254 irsb = emptyIRSB();
255
256 /* Delta keeps track of how far along the guest_code array we have
257 so far gone. */
258 delta = 0;
259 n_instrs = 0;
260 *n_guest_instrs = 0;
261
262 /* Guest addresses as IRConsts. Used in self-checks to specify the
263 restart-after-discard point. */
264 guest_IP_bbstart_IRConst
265 = guest_word_type==Ity_I32
266 ? IRConst_U32(toUInt(guest_IP_bbstart))
267 : IRConst_U64(guest_IP_bbstart);
268
269 /* Leave 15 spaces in which to put the check statements for a self
270 checking translation (up to 3 extents, and 5 stmts required for
271 each). We won't know until later the extents and checksums of
272 the areas, if any, that need to be checked. */
273 nop = IRStmt_NoOp();
274 selfcheck_idx = irsb->stmts_used;
275 for (i = 0; i < 3 * 5; i++)
276 addStmtToIRSB( irsb, nop );
277
278 /* If the caller supplied a function to add its own preamble, use
279 it now. */
280 if (preamble_function) {
281 Bool stopNow = preamble_function( callback_opaque, irsb );
282 if (stopNow) {
283 /* The callback has completed the IR block without any guest
284 insns being disassembled into it, so just return it at
285 this point, even if a self-check was requested - as there
286 is nothing to self-check. The 15 self-check no-ops will
287 still be in place, but they are harmless. */
288 return irsb;
289 }
290 }
291
292 /* Process instructions. */
293 while (True) {
294 vassert(n_instrs < guest_max_insns_really);
295
296 /* Regardless of what chase_into_ok says, is chasing permissible
297 at all right now? Set resteerOKfn accordingly. */
298 resteerOK
299 = toBool(
300 n_instrs < vex_control.guest_chase_thresh
301 /* we can't afford to have a resteer once we're on the
302 last extent slot. */
303 && vge->n_used < 3
304 );
305
306 resteerOKfn
307 = resteerOK ? chase_into_ok : const_False;
308
309 /* n_cond_resteers_allowed keeps track of whether we're still
310 allowing dis_instr_fn to chase conditional branches. It
311 starts (at 2) and gets decremented each time dis_instr_fn
312 tells us it has chased a conditional branch. We then
313 decrement it, and use it to tell later calls to dis_instr_fn
314 whether or not it is allowed to chase conditional
315 branches. */
316 vassert(n_cond_resteers_allowed >= 0 && n_cond_resteers_allowed <= 2);
317
318 /* This is the IP of the instruction we're just about to deal
319 with. */
320 guest_IP_curr_instr = guest_IP_bbstart + delta;
321
322 /* This is the irsb statement array index of the first stmt in
323 this insn. That will always be the instruction-mark
324 descriptor. */
325 first_stmt_idx = irsb->stmts_used;
326
327 /* Add an instruction-mark statement. We won't know until after
328 disassembling the instruction how long it instruction is, so
329 just put in a zero length and we'll fix it up later.
330
331 On ARM, the least significant bit of the instr address
332 distinguishes ARM vs Thumb instructions. All instructions
333 actually start on at least 2-aligned addresses. So we need
334 to ignore the bottom bit of the insn address when forming the
335 IMark's address field, but put that bottom bit in the delta
336 field, so that comparisons against guest_R15T for Thumb can
337 be done correctly. By inspecting the delta field,
338 instruction processors can determine whether the instruction
339 was originally Thumb or ARM. For more details of this
340 convention, see comments on definition of guest_R15T in
341 libvex_guest_arm.h. */
342 if (arch_guest == VexArchARM && (guest_IP_curr_instr & 1)) {
343 /* Thumb insn => mask out the T bit, but put it in delta */
344 addStmtToIRSB( irsb,
345 IRStmt_IMark(guest_IP_curr_instr & ~(Addr)1,
346 0, /* len */
347 1 /* delta */
348 )
349 );
350 } else {
351 /* All other targets: store IP as-is, and set delta to zero. */
352 addStmtToIRSB( irsb,
353 IRStmt_IMark(guest_IP_curr_instr,
354 0, /* len */
355 0 /* delta */
356 )
357 );
358 }
359
360 if (debug_print && n_instrs > 0)
361 vex_printf("\n");
362
363 /* Finally, actually disassemble an instruction. */
364 vassert(irsb->next == NULL);
365 dres = dis_instr_fn ( irsb,
366 resteerOKfn,
367 toBool(n_cond_resteers_allowed > 0),
368 callback_opaque,
369 guest_code,
370 delta,
371 guest_IP_curr_instr,
372 arch_guest,
373 archinfo_guest,
374 abiinfo_both,
375 host_endness,
376 sigill_diag );
377
378 /* stay sane ... */
379 vassert(dres.whatNext == Dis_StopHere
380 || dres.whatNext == Dis_Continue
381 || dres.whatNext == Dis_ResteerU
382 || dres.whatNext == Dis_ResteerC);
383 /* ... disassembled insn length is sane ... */
384 vassert(dres.len >= 0 && dres.len <= 24);
385 /* ... continueAt is zero if no resteer requested ... */
386 if (dres.whatNext != Dis_ResteerU && dres.whatNext != Dis_ResteerC)
387 vassert(dres.continueAt == 0);
388 /* ... if we disallowed conditional resteers, check that one
389 didn't actually happen anyway ... */
390 if (n_cond_resteers_allowed == 0)
391 vassert(dres.whatNext != Dis_ResteerC);
392
393 /* If the disassembly function passed us a hint, take note of it. */
394 if (LIKELY(dres.hint == Dis_HintNone)) {
395 /* Do nothing */
396 } else {
397 vassert(dres.hint == Dis_HintVerbose);
398 /* The current insn is known to be verbose. Lower the max insns limit
399 if necessary so as to avoid running the JIT out of space in the
400 event that we've encountered the start of a long sequence of them.
401 This is expected to be a very rare event. In any case the remaining
402 limit (30 insns) is still so high that most blocks will terminate
403 anyway before then. So this is very unlikely to give a perf hit in
404 practice. See bug 375839 for the motivating example. */
405 if (guest_max_insns_really > 30) {
406 guest_max_insns_really = 30;
407 }
408 }
409
410 /* Fill in the insn-mark length field. */
411 vassert(first_stmt_idx >= 0 && first_stmt_idx < irsb->stmts_used);
412 imark = irsb->stmts[first_stmt_idx];
413 vassert(imark);
414 vassert(imark->tag == Ist_IMark);
415 vassert(imark->Ist.IMark.len == 0);
416 imark->Ist.IMark.len = dres.len;
417
418 /* Print the resulting IR, if needed. */
419 if (vex_traceflags & VEX_TRACE_FE) {
420 for (i = first_stmt_idx; i < irsb->stmts_used; i++) {
421 vex_printf(" ");
422 ppIRStmt(irsb->stmts[i]);
423 vex_printf("\n");
424 }
425 }
426
427 /* Individual insn disassembly may not mess with irsb->next.
428 This function is the only place where it can be set. */
429 vassert(irsb->next == NULL);
430 vassert(irsb->jumpkind == Ijk_Boring);
431 vassert(irsb->offsIP == 0);
432
433 /* Individual insn disassembly must finish the IR for each
434 instruction with an assignment to the guest PC. */
435 vassert(first_stmt_idx < irsb->stmts_used);
436 /* it follows that irsb->stmts_used must be > 0 */
437 { IRStmt* st = irsb->stmts[irsb->stmts_used-1];
438 vassert(st);
439 vassert(st->tag == Ist_Put);
440 vassert(st->Ist.Put.offset == offB_GUEST_IP);
441 /* Really we should also check that the type of the Put'd data
442 == guest_word_type, but that's a bit expensive. */
443 }
444
445 /* Update the VexGuestExtents we are constructing. */
446 /* If vex_control.guest_max_insns is required to be < 100 and
447 each insn is at max 20 bytes long, this limit of 5000 then
448 seems reasonable since the max possible extent length will be
449 100 * 20 == 2000. */
450 vassert(vge->len[vge->n_used-1] < 5000);
451 vge->len[vge->n_used-1]
452 = toUShort(toUInt( vge->len[vge->n_used-1] + dres.len ));
453 n_instrs++;
454
455 /* Advance delta (inconspicuous but very important :-) */
456 delta += (Long)dres.len;
457
458 switch (dres.whatNext) {
459 case Dis_Continue:
460 vassert(dres.continueAt == 0);
461 vassert(dres.jk_StopHere == Ijk_INVALID);
462 if (n_instrs < guest_max_insns_really) {
463 /* keep going */
464 } else {
465 /* We have to stop. See comment above re irsb field
466 settings here. */
467 irsb->next = IRExpr_Get(offB_GUEST_IP, guest_word_type);
468 /* irsb->jumpkind must already by Ijk_Boring */
469 irsb->offsIP = offB_GUEST_IP;
470 goto done;
471 }
472 break;
473 case Dis_StopHere:
474 vassert(dres.continueAt == 0);
475 vassert(dres.jk_StopHere != Ijk_INVALID);
476 /* See comment above re irsb field settings here. */
477 irsb->next = IRExpr_Get(offB_GUEST_IP, guest_word_type);
478 irsb->jumpkind = dres.jk_StopHere;
479 irsb->offsIP = offB_GUEST_IP;
480 goto done;
481
482 case Dis_ResteerU:
483 case Dis_ResteerC:
484 /* Check that we actually allowed a resteer .. */
485 vassert(resteerOK);
486 if (dres.whatNext == Dis_ResteerC) {
487 vassert(n_cond_resteers_allowed > 0);
488 n_cond_resteers_allowed--;
489 }
490 /* figure out a new delta to continue at. */
491 vassert(resteerOKfn(callback_opaque,dres.continueAt));
492 delta = dres.continueAt - guest_IP_bbstart;
493 /* we now have to start a new extent slot. */
494 vge->n_used++;
495 vassert(vge->n_used <= 3);
496 vge->base[vge->n_used-1] = dres.continueAt;
497 vge->len[vge->n_used-1] = 0;
498 n_resteers++;
499 d_resteers++;
500 if (0 && (n_resteers & 0xFF) == 0)
501 vex_printf("resteer[%d,%d] to 0x%lx (delta = %lld)\n",
502 n_resteers, d_resteers,
503 dres.continueAt, delta);
504 break;
505 default:
506 vpanic("bb_to_IR");
507 }
508 }
509 /*NOTREACHED*/
510 vassert(0);
511
512 done:
513 /* We're done. The only thing that might need attending to is that
514 a self-checking preamble may need to be created. If so it gets
515 placed in the 15 slots reserved above.
516
517 The scheme is to compute a rather crude checksum of the code
518 we're making a translation of, and add to the IR a call to a
519 helper routine which recomputes the checksum every time the
520 translation is run, and requests a retranslation if it doesn't
521 match. This is obviously very expensive and considerable
522 efforts are made to speed it up:
523
524 * the checksum is computed from all the naturally aligned
525 host-sized words that overlap the translated code. That means
526 it could depend on up to 7 bytes before and 7 bytes after
527 which aren't part of the translated area, and so if those
528 change then we'll unnecessarily have to discard and
529 retranslate. This seems like a pretty remote possibility and
530 it seems as if the benefit of not having to deal with the ends
531 of the range at byte precision far outweigh any possible extra
532 translations needed.
533
534 * there's a generic routine and 12 specialised cases, which
535 handle the cases of 1 through 12-word lengths respectively.
536 They seem to cover about 90% of the cases that occur in
537 practice.
538
539 We ask the caller, via needs_self_check, which of the 3 vge
540 extents needs a check, and only generate check code for those
541 that do.
542 */
543 {
544 Addr base2check;
545 UInt len2check;
546 HWord expectedhW;
547 IRTemp tistart_tmp, tilen_tmp;
548 HWord VEX_REGPARM(2) (*fn_generic)(HWord, HWord);
549 HWord VEX_REGPARM(1) (*fn_spec)(HWord);
550 const HChar* nm_generic;
551 const HChar* nm_spec;
552 HWord fn_generic_entry = 0;
553 HWord fn_spec_entry = 0;
554 UInt host_word_szB = sizeof(HWord);
555 IRType host_word_type = Ity_INVALID;
556
557 UInt extents_needing_check
558 = needs_self_check(callback_opaque, pxControl, vge);
559
560 if (host_word_szB == 4) host_word_type = Ity_I32;
561 if (host_word_szB == 8) host_word_type = Ity_I64;
562 vassert(host_word_type != Ity_INVALID);
563
564 vassert(vge->n_used >= 1 && vge->n_used <= 3);
565
566 /* Caller shouldn't claim that nonexistent extents need a
567 check. */
568 vassert((extents_needing_check >> vge->n_used) == 0);
569
570 for (i = 0; i < vge->n_used; i++) {
571
572 /* Do we need to generate a check for this extent? */
573 if ((extents_needing_check & (1 << i)) == 0)
574 continue;
575
576 /* Tell the caller */
577 (*n_sc_extents)++;
578
579 /* the extent we're generating a check for */
580 base2check = vge->base[i];
581 len2check = vge->len[i];
582
583 /* stay sane */
584 vassert(len2check >= 0 && len2check < 1000/*arbitrary*/);
585
586 /* Skip the check if the translation involved zero bytes */
587 if (len2check == 0)
588 continue;
589
590 HWord first_hW = ((HWord)base2check)
591 & ~(HWord)(host_word_szB-1);
592 HWord last_hW = (((HWord)base2check) + len2check - 1)
593 & ~(HWord)(host_word_szB-1);
594 vassert(first_hW <= last_hW);
595 HWord hW_diff = last_hW - first_hW;
596 vassert(0 == (hW_diff & (host_word_szB-1)));
597 HWord hWs_to_check = (hW_diff + host_word_szB) / host_word_szB;
598 vassert(hWs_to_check > 0
599 && hWs_to_check < 1004/*arbitrary*/ / host_word_szB);
600
601 /* vex_printf("%lx %lx %ld\n", first_hW, last_hW, hWs_to_check); */
602
603 if (host_word_szB == 8) {
604 fn_generic = (VEX_REGPARM(2) HWord(*)(HWord, HWord))
605 genericg_compute_checksum_8al;
606 nm_generic = "genericg_compute_checksum_8al";
607 } else {
608 fn_generic = (VEX_REGPARM(2) HWord(*)(HWord, HWord))
609 genericg_compute_checksum_4al;
610 nm_generic = "genericg_compute_checksum_4al";
611 }
612
613 fn_spec = NULL;
614 nm_spec = NULL;
615
616 if (host_word_szB == 8) {
617 const HChar* nm = NULL;
618 ULong VEX_REGPARM(1) (*fn)(HWord) = NULL;
619 switch (hWs_to_check) {
620 case 1: fn = genericg_compute_checksum_8al_1;
621 nm = "genericg_compute_checksum_8al_1"; break;
622 case 2: fn = genericg_compute_checksum_8al_2;
623 nm = "genericg_compute_checksum_8al_2"; break;
624 case 3: fn = genericg_compute_checksum_8al_3;
625 nm = "genericg_compute_checksum_8al_3"; break;
626 case 4: fn = genericg_compute_checksum_8al_4;
627 nm = "genericg_compute_checksum_8al_4"; break;
628 case 5: fn = genericg_compute_checksum_8al_5;
629 nm = "genericg_compute_checksum_8al_5"; break;
630 case 6: fn = genericg_compute_checksum_8al_6;
631 nm = "genericg_compute_checksum_8al_6"; break;
632 case 7: fn = genericg_compute_checksum_8al_7;
633 nm = "genericg_compute_checksum_8al_7"; break;
634 case 8: fn = genericg_compute_checksum_8al_8;
635 nm = "genericg_compute_checksum_8al_8"; break;
636 case 9: fn = genericg_compute_checksum_8al_9;
637 nm = "genericg_compute_checksum_8al_9"; break;
638 case 10: fn = genericg_compute_checksum_8al_10;
639 nm = "genericg_compute_checksum_8al_10"; break;
640 case 11: fn = genericg_compute_checksum_8al_11;
641 nm = "genericg_compute_checksum_8al_11"; break;
642 case 12: fn = genericg_compute_checksum_8al_12;
643 nm = "genericg_compute_checksum_8al_12"; break;
644 default: break;
645 }
646 fn_spec = (VEX_REGPARM(1) HWord(*)(HWord)) fn;
647 nm_spec = nm;
648 } else {
649 const HChar* nm = NULL;
650 UInt VEX_REGPARM(1) (*fn)(HWord) = NULL;
651 switch (hWs_to_check) {
652 case 1: fn = genericg_compute_checksum_4al_1;
653 nm = "genericg_compute_checksum_4al_1"; break;
654 case 2: fn = genericg_compute_checksum_4al_2;
655 nm = "genericg_compute_checksum_4al_2"; break;
656 case 3: fn = genericg_compute_checksum_4al_3;
657 nm = "genericg_compute_checksum_4al_3"; break;
658 case 4: fn = genericg_compute_checksum_4al_4;
659 nm = "genericg_compute_checksum_4al_4"; break;
660 case 5: fn = genericg_compute_checksum_4al_5;
661 nm = "genericg_compute_checksum_4al_5"; break;
662 case 6: fn = genericg_compute_checksum_4al_6;
663 nm = "genericg_compute_checksum_4al_6"; break;
664 case 7: fn = genericg_compute_checksum_4al_7;
665 nm = "genericg_compute_checksum_4al_7"; break;
666 case 8: fn = genericg_compute_checksum_4al_8;
667 nm = "genericg_compute_checksum_4al_8"; break;
668 case 9: fn = genericg_compute_checksum_4al_9;
669 nm = "genericg_compute_checksum_4al_9"; break;
670 case 10: fn = genericg_compute_checksum_4al_10;
671 nm = "genericg_compute_checksum_4al_10"; break;
672 case 11: fn = genericg_compute_checksum_4al_11;
673 nm = "genericg_compute_checksum_4al_11"; break;
674 case 12: fn = genericg_compute_checksum_4al_12;
675 nm = "genericg_compute_checksum_4al_12"; break;
676 default: break;
677 }
678 fn_spec = (VEX_REGPARM(1) HWord(*)(HWord))fn;
679 nm_spec = nm;
680 }
681
682 expectedhW = fn_generic( first_hW, hWs_to_check );
683 /* If we got a specialised version, check it produces the same
684 result as the generic version! */
685 if (fn_spec) {
686 vassert(nm_spec);
687 vassert(expectedhW == fn_spec( first_hW ));
688 } else {
689 vassert(!nm_spec);
690 }
691
692 /* Set CMSTART and CMLEN. These will describe to the despatcher
693 the area of guest code to invalidate should we exit with a
694 self-check failure. */
695
696 tistart_tmp = newIRTemp(irsb->tyenv, guest_word_type);
697 tilen_tmp = newIRTemp(irsb->tyenv, guest_word_type);
698
699 IRConst* base2check_IRConst
700 = guest_word_type==Ity_I32 ? IRConst_U32(toUInt(base2check))
701 : IRConst_U64(base2check);
702 IRConst* len2check_IRConst
703 = guest_word_type==Ity_I32 ? IRConst_U32(len2check)
704 : IRConst_U64(len2check);
705
706 irsb->stmts[selfcheck_idx + i * 5 + 0]
707 = IRStmt_WrTmp(tistart_tmp, IRExpr_Const(base2check_IRConst) );
708
709 irsb->stmts[selfcheck_idx + i * 5 + 1]
710 = IRStmt_WrTmp(tilen_tmp, IRExpr_Const(len2check_IRConst) );
711
712 irsb->stmts[selfcheck_idx + i * 5 + 2]
713 = IRStmt_Put( offB_GUEST_CMSTART, IRExpr_RdTmp(tistart_tmp) );
714
715 irsb->stmts[selfcheck_idx + i * 5 + 3]
716 = IRStmt_Put( offB_GUEST_CMLEN, IRExpr_RdTmp(tilen_tmp) );
717
718 /* Generate the entry point descriptors */
719 if (abiinfo_both->host_ppc_calls_use_fndescrs) {
720 HWord* descr = (HWord*)fn_generic;
721 fn_generic_entry = descr[0];
722 if (fn_spec) {
723 descr = (HWord*)fn_spec;
724 fn_spec_entry = descr[0];
725 } else {
726 fn_spec_entry = (HWord)NULL;
727 }
728 } else {
729 fn_generic_entry = (HWord)fn_generic;
730 if (fn_spec) {
731 fn_spec_entry = (HWord)fn_spec;
732 } else {
733 fn_spec_entry = (HWord)NULL;
734 }
735 }
736
737 IRExpr* callexpr = NULL;
738 if (fn_spec) {
739 callexpr = mkIRExprCCall(
740 host_word_type, 1/*regparms*/,
741 nm_spec, (void*)fn_spec_entry,
742 mkIRExprVec_1(
743 mkIRExpr_HWord( (HWord)first_hW )
744 )
745 );
746 } else {
747 callexpr = mkIRExprCCall(
748 host_word_type, 2/*regparms*/,
749 nm_generic, (void*)fn_generic_entry,
750 mkIRExprVec_2(
751 mkIRExpr_HWord( (HWord)first_hW ),
752 mkIRExpr_HWord( (HWord)hWs_to_check )
753 )
754 );
755 }
756
757 irsb->stmts[selfcheck_idx + i * 5 + 4]
758 = IRStmt_Exit(
759 IRExpr_Binop(
760 host_word_type==Ity_I64 ? Iop_CmpNE64 : Iop_CmpNE32,
761 callexpr,
762 host_word_type==Ity_I64
763 ? IRExpr_Const(IRConst_U64(expectedhW))
764 : IRExpr_Const(IRConst_U32(expectedhW))
765 ),
766 Ijk_InvalICache,
767 /* Where we must restart if there's a failure: at the
768 first extent, regardless of which extent the
769 failure actually happened in. */
770 guest_IP_bbstart_IRConst,
771 offB_GUEST_IP
772 );
773 } /* for (i = 0; i < vge->n_used; i++) */
774 }
775
776 /* irsb->next must now be set, since we've finished the block.
777 Print it if necessary.*/
778 vassert(irsb->next != NULL);
779 if (debug_print) {
780 vex_printf(" ");
781 vex_printf( "PUT(%d) = ", irsb->offsIP);
782 ppIRExpr( irsb->next );
783 vex_printf( "; exit-");
784 ppIRJumpKind(irsb->jumpkind);
785 vex_printf( "\n");
786 vex_printf( "\n");
787 }
788
789 *n_guest_instrs = n_instrs;
790 return irsb;
791 }
792
793
794 /*-------------------------------------------------------------
795 A support routine for doing self-checking translations.
796 -------------------------------------------------------------*/
797
798 /* CLEAN HELPER */
799 /* CALLED FROM GENERATED CODE */
800
801 /* Compute a checksum of host memory at [addr .. addr+len-1], as fast
802 as possible. All _4al versions assume that the supplied address is
803 4 aligned. All length values are in 4-byte chunks. These fns
804 arecalled once for every use of a self-checking translation, so
805 they needs to be as fast as possible. */
806
807 /* --- 32-bit versions, used only on 32-bit hosts --- */
808
ROL32(UInt w,Int n)809 static inline UInt ROL32 ( UInt w, Int n ) {
810 w = (w << n) | (w >> (32-n));
811 return w;
812 }
813
814 VEX_REGPARM(2)
genericg_compute_checksum_4al(HWord first_w32,HWord n_w32s)815 static UInt genericg_compute_checksum_4al ( HWord first_w32, HWord n_w32s )
816 {
817 UInt sum1 = 0, sum2 = 0;
818 UInt* p = (UInt*)first_w32;
819 /* unrolled */
820 while (n_w32s >= 4) {
821 UInt w;
822 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
823 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
824 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
825 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
826 p += 4;
827 n_w32s -= 4;
828 sum1 ^= sum2;
829 }
830 while (n_w32s >= 1) {
831 UInt w;
832 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
833 p += 1;
834 n_w32s -= 1;
835 sum1 ^= sum2;
836 }
837 return sum1 + sum2;
838 }
839
840 /* Specialised versions of the above function */
841
842 VEX_REGPARM(1)
genericg_compute_checksum_4al_1(HWord first_w32)843 static UInt genericg_compute_checksum_4al_1 ( HWord first_w32 )
844 {
845 UInt sum1 = 0, sum2 = 0;
846 UInt* p = (UInt*)first_w32;
847 UInt w;
848 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
849 sum1 ^= sum2;
850 return sum1 + sum2;
851 }
852
853 VEX_REGPARM(1)
genericg_compute_checksum_4al_2(HWord first_w32)854 static UInt genericg_compute_checksum_4al_2 ( HWord first_w32 )
855 {
856 UInt sum1 = 0, sum2 = 0;
857 UInt* p = (UInt*)first_w32;
858 UInt w;
859 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
860 sum1 ^= sum2;
861 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
862 sum1 ^= sum2;
863 return sum1 + sum2;
864 }
865
866 VEX_REGPARM(1)
genericg_compute_checksum_4al_3(HWord first_w32)867 static UInt genericg_compute_checksum_4al_3 ( HWord first_w32 )
868 {
869 UInt sum1 = 0, sum2 = 0;
870 UInt* p = (UInt*)first_w32;
871 UInt w;
872 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
873 sum1 ^= sum2;
874 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
875 sum1 ^= sum2;
876 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
877 sum1 ^= sum2;
878 return sum1 + sum2;
879 }
880
881 VEX_REGPARM(1)
genericg_compute_checksum_4al_4(HWord first_w32)882 static UInt genericg_compute_checksum_4al_4 ( HWord first_w32 )
883 {
884 UInt sum1 = 0, sum2 = 0;
885 UInt* p = (UInt*)first_w32;
886 UInt w;
887 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
888 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
889 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
890 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
891 sum1 ^= sum2;
892 return sum1 + sum2;
893 }
894
895 VEX_REGPARM(1)
genericg_compute_checksum_4al_5(HWord first_w32)896 static UInt genericg_compute_checksum_4al_5 ( HWord first_w32 )
897 {
898 UInt sum1 = 0, sum2 = 0;
899 UInt* p = (UInt*)first_w32;
900 UInt w;
901 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
902 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
903 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
904 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
905 sum1 ^= sum2;
906 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
907 sum1 ^= sum2;
908 return sum1 + sum2;
909 }
910
911 VEX_REGPARM(1)
genericg_compute_checksum_4al_6(HWord first_w32)912 static UInt genericg_compute_checksum_4al_6 ( HWord first_w32 )
913 {
914 UInt sum1 = 0, sum2 = 0;
915 UInt* p = (UInt*)first_w32;
916 UInt w;
917 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
918 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
919 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
920 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
921 sum1 ^= sum2;
922 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
923 sum1 ^= sum2;
924 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
925 sum1 ^= sum2;
926 return sum1 + sum2;
927 }
928
929 VEX_REGPARM(1)
genericg_compute_checksum_4al_7(HWord first_w32)930 static UInt genericg_compute_checksum_4al_7 ( HWord first_w32 )
931 {
932 UInt sum1 = 0, sum2 = 0;
933 UInt* p = (UInt*)first_w32;
934 UInt w;
935 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
936 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
937 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
938 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
939 sum1 ^= sum2;
940 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
941 sum1 ^= sum2;
942 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
943 sum1 ^= sum2;
944 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
945 sum1 ^= sum2;
946 return sum1 + sum2;
947 }
948
949 VEX_REGPARM(1)
genericg_compute_checksum_4al_8(HWord first_w32)950 static UInt genericg_compute_checksum_4al_8 ( HWord first_w32 )
951 {
952 UInt sum1 = 0, sum2 = 0;
953 UInt* p = (UInt*)first_w32;
954 UInt w;
955 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
956 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
957 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
958 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
959 sum1 ^= sum2;
960 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
961 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
962 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
963 w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
964 sum1 ^= sum2;
965 return sum1 + sum2;
966 }
967
968 VEX_REGPARM(1)
genericg_compute_checksum_4al_9(HWord first_w32)969 static UInt genericg_compute_checksum_4al_9 ( HWord first_w32 )
970 {
971 UInt sum1 = 0, sum2 = 0;
972 UInt* p = (UInt*)first_w32;
973 UInt w;
974 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
975 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
976 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
977 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
978 sum1 ^= sum2;
979 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
980 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
981 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
982 w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
983 sum1 ^= sum2;
984 w = p[8]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
985 sum1 ^= sum2;
986 return sum1 + sum2;
987 }
988
989 VEX_REGPARM(1)
genericg_compute_checksum_4al_10(HWord first_w32)990 static UInt genericg_compute_checksum_4al_10 ( HWord first_w32 )
991 {
992 UInt sum1 = 0, sum2 = 0;
993 UInt* p = (UInt*)first_w32;
994 UInt w;
995 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
996 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
997 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
998 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
999 sum1 ^= sum2;
1000 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1001 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1002 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1003 w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1004 sum1 ^= sum2;
1005 w = p[8]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1006 sum1 ^= sum2;
1007 w = p[9]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1008 sum1 ^= sum2;
1009 return sum1 + sum2;
1010 }
1011
1012 VEX_REGPARM(1)
genericg_compute_checksum_4al_11(HWord first_w32)1013 static UInt genericg_compute_checksum_4al_11 ( HWord first_w32 )
1014 {
1015 UInt sum1 = 0, sum2 = 0;
1016 UInt* p = (UInt*)first_w32;
1017 UInt w;
1018 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1019 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1020 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1021 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1022 sum1 ^= sum2;
1023 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1024 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1025 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1026 w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1027 sum1 ^= sum2;
1028 w = p[8]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1029 sum1 ^= sum2;
1030 w = p[9]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1031 sum1 ^= sum2;
1032 w = p[10]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1033 sum1 ^= sum2;
1034 return sum1 + sum2;
1035 }
1036
1037 VEX_REGPARM(1)
genericg_compute_checksum_4al_12(HWord first_w32)1038 static UInt genericg_compute_checksum_4al_12 ( HWord first_w32 )
1039 {
1040 UInt sum1 = 0, sum2 = 0;
1041 UInt* p = (UInt*)first_w32;
1042 UInt w;
1043 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1044 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1045 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1046 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1047 sum1 ^= sum2;
1048 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1049 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1050 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1051 w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1052 sum1 ^= sum2;
1053 w = p[8]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1054 w = p[9]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1055 w = p[10]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1056 w = p[11]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1057 sum1 ^= sum2;
1058 return sum1 + sum2;
1059 }
1060
1061
1062 /* --- 64-bit versions, used only on 64-bit hosts --- */
1063
ROL64(ULong w,Int n)1064 static inline ULong ROL64 ( ULong w, Int n ) {
1065 w = (w << n) | (w >> (64-n));
1066 return w;
1067 }
1068
1069 VEX_REGPARM(2)
genericg_compute_checksum_8al(HWord first_w64,HWord n_w64s)1070 static ULong genericg_compute_checksum_8al ( HWord first_w64, HWord n_w64s )
1071 {
1072 ULong sum1 = 0, sum2 = 0;
1073 ULong* p = (ULong*)first_w64;
1074 /* unrolled */
1075 while (n_w64s >= 4) {
1076 ULong w;
1077 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1078 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1079 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1080 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1081 p += 4;
1082 n_w64s -= 4;
1083 sum1 ^= sum2;
1084 }
1085 while (n_w64s >= 1) {
1086 ULong w;
1087 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1088 p += 1;
1089 n_w64s -= 1;
1090 sum1 ^= sum2;
1091 }
1092 return sum1 + sum2;
1093 }
1094
1095 /* Specialised versions of the above function */
1096
1097 VEX_REGPARM(1)
genericg_compute_checksum_8al_1(HWord first_w64)1098 static ULong genericg_compute_checksum_8al_1 ( HWord first_w64 )
1099 {
1100 ULong sum1 = 0, sum2 = 0;
1101 ULong* p = (ULong*)first_w64;
1102 ULong w;
1103 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1104 sum1 ^= sum2;
1105 return sum1 + sum2;
1106 }
1107
1108 VEX_REGPARM(1)
genericg_compute_checksum_8al_2(HWord first_w64)1109 static ULong genericg_compute_checksum_8al_2 ( HWord first_w64 )
1110 {
1111 ULong sum1 = 0, sum2 = 0;
1112 ULong* p = (ULong*)first_w64;
1113 ULong w;
1114 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1115 sum1 ^= sum2;
1116 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1117 sum1 ^= sum2;
1118 return sum1 + sum2;
1119 }
1120
1121 VEX_REGPARM(1)
genericg_compute_checksum_8al_3(HWord first_w64)1122 static ULong genericg_compute_checksum_8al_3 ( HWord first_w64 )
1123 {
1124 ULong sum1 = 0, sum2 = 0;
1125 ULong* p = (ULong*)first_w64;
1126 ULong w;
1127 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1128 sum1 ^= sum2;
1129 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1130 sum1 ^= sum2;
1131 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1132 sum1 ^= sum2;
1133 return sum1 + sum2;
1134 }
1135
1136 VEX_REGPARM(1)
genericg_compute_checksum_8al_4(HWord first_w64)1137 static ULong genericg_compute_checksum_8al_4 ( HWord first_w64 )
1138 {
1139 ULong sum1 = 0, sum2 = 0;
1140 ULong* p = (ULong*)first_w64;
1141 ULong w;
1142 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1143 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1144 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1145 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1146 sum1 ^= sum2;
1147 return sum1 + sum2;
1148 }
1149
1150 VEX_REGPARM(1)
genericg_compute_checksum_8al_5(HWord first_w64)1151 static ULong genericg_compute_checksum_8al_5 ( HWord first_w64 )
1152 {
1153 ULong sum1 = 0, sum2 = 0;
1154 ULong* p = (ULong*)first_w64;
1155 ULong w;
1156 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1157 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1158 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1159 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1160 sum1 ^= sum2;
1161 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1162 sum1 ^= sum2;
1163 return sum1 + sum2;
1164 }
1165
1166 VEX_REGPARM(1)
genericg_compute_checksum_8al_6(HWord first_w64)1167 static ULong genericg_compute_checksum_8al_6 ( HWord first_w64 )
1168 {
1169 ULong sum1 = 0, sum2 = 0;
1170 ULong* p = (ULong*)first_w64;
1171 ULong w;
1172 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1173 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1174 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1175 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1176 sum1 ^= sum2;
1177 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1178 sum1 ^= sum2;
1179 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1180 sum1 ^= sum2;
1181 return sum1 + sum2;
1182 }
1183
1184 VEX_REGPARM(1)
genericg_compute_checksum_8al_7(HWord first_w64)1185 static ULong genericg_compute_checksum_8al_7 ( HWord first_w64 )
1186 {
1187 ULong sum1 = 0, sum2 = 0;
1188 ULong* p = (ULong*)first_w64;
1189 ULong w;
1190 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1191 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1192 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1193 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1194 sum1 ^= sum2;
1195 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1196 sum1 ^= sum2;
1197 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1198 sum1 ^= sum2;
1199 w = p[6]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1200 sum1 ^= sum2;
1201 return sum1 + sum2;
1202 }
1203
1204 VEX_REGPARM(1)
genericg_compute_checksum_8al_8(HWord first_w64)1205 static ULong genericg_compute_checksum_8al_8 ( HWord first_w64 )
1206 {
1207 ULong sum1 = 0, sum2 = 0;
1208 ULong* p = (ULong*)first_w64;
1209 ULong w;
1210 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1211 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1212 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1213 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1214 sum1 ^= sum2;
1215 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1216 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1217 w = p[6]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1218 w = p[7]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1219 sum1 ^= sum2;
1220 return sum1 + sum2;
1221 }
1222
1223 VEX_REGPARM(1)
genericg_compute_checksum_8al_9(HWord first_w64)1224 static ULong genericg_compute_checksum_8al_9 ( HWord first_w64 )
1225 {
1226 ULong sum1 = 0, sum2 = 0;
1227 ULong* p = (ULong*)first_w64;
1228 ULong w;
1229 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1230 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1231 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1232 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1233 sum1 ^= sum2;
1234 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1235 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1236 w = p[6]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1237 w = p[7]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1238 sum1 ^= sum2;
1239 w = p[8]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1240 sum1 ^= sum2;
1241 return sum1 + sum2;
1242 }
1243
1244 VEX_REGPARM(1)
genericg_compute_checksum_8al_10(HWord first_w64)1245 static ULong genericg_compute_checksum_8al_10 ( HWord first_w64 )
1246 {
1247 ULong sum1 = 0, sum2 = 0;
1248 ULong* p = (ULong*)first_w64;
1249 ULong w;
1250 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1251 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1252 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1253 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1254 sum1 ^= sum2;
1255 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1256 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1257 w = p[6]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1258 w = p[7]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1259 sum1 ^= sum2;
1260 w = p[8]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1261 sum1 ^= sum2;
1262 w = p[9]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1263 sum1 ^= sum2;
1264 return sum1 + sum2;
1265 }
1266
1267 VEX_REGPARM(1)
genericg_compute_checksum_8al_11(HWord first_w64)1268 static ULong genericg_compute_checksum_8al_11 ( HWord first_w64 )
1269 {
1270 ULong sum1 = 0, sum2 = 0;
1271 ULong* p = (ULong*)first_w64;
1272 ULong w;
1273 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1274 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1275 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1276 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1277 sum1 ^= sum2;
1278 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1279 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1280 w = p[6]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1281 w = p[7]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1282 sum1 ^= sum2;
1283 w = p[8]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1284 sum1 ^= sum2;
1285 w = p[9]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1286 sum1 ^= sum2;
1287 w = p[10]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1288 sum1 ^= sum2;
1289 return sum1 + sum2;
1290 }
1291
1292 VEX_REGPARM(1)
genericg_compute_checksum_8al_12(HWord first_w64)1293 static ULong genericg_compute_checksum_8al_12 ( HWord first_w64 )
1294 {
1295 ULong sum1 = 0, sum2 = 0;
1296 ULong* p = (ULong*)first_w64;
1297 ULong w;
1298 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1299 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1300 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1301 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1302 sum1 ^= sum2;
1303 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1304 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1305 w = p[6]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1306 w = p[7]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1307 sum1 ^= sum2;
1308 w = p[8]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1309 w = p[9]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1310 w = p[10]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1311 w = p[11]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1312 sum1 ^= sum2;
1313 return sum1 + sum2;
1314 }
1315
1316 /*--------------------------------------------------------------------*/
1317 /*--- end guest_generic_bb_to_IR.c ---*/
1318 /*--------------------------------------------------------------------*/
1319