1
2/*--------------------------------------------------------------------*/
3/*--- The core dispatch loop, for jumping to a code address.       ---*/
4/*---                                       dispatch-ppc32-linux.S ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8  This file is part of Valgrind, a dynamic binary instrumentation
9  framework.
10
11  Copyright (C) 2005-2015 Cerion Armour-Brown <cerion@open-works.co.uk>
12
13  This program is free software; you can redistribute it and/or
14  modify it under the terms of the GNU General Public License as
15  published by the Free Software Foundation; either version 2 of the
16  License, or (at your option) any later version.
17
18  This program is distributed in the hope that it will be useful, but
19  WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  General Public License for more details.
22
23  You should have received a copy of the GNU General Public License
24  along with this program; if not, write to the Free Software
25  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26  02111-1307, USA.
27
28  The GNU General Public License is contained in the file COPYING.
29*/
30
31#include "pub_core_basics_asm.h"
32
33#if defined(VGP_ppc32_linux)
34
35#include "pub_core_dispatch_asm.h"
36#include "pub_core_transtab_asm.h"
37#include "libvex_guest_offsets.h"	/* for OFFSET_ppc32_CIA */
38
39
40/*------------------------------------------------------------*/
41/*---                                                      ---*/
42/*--- The dispatch loop.  VG_(disp_run_translations) is    ---*/
43/*--- used to run all translations,                        ---*/
44/*--- including no-redir ones.                             ---*/
45/*---                                                      ---*/
46/*------------------------------------------------------------*/
47
48/*----------------------------------------------------*/
49/*--- Entry and preamble (set everything up)       ---*/
50/*----------------------------------------------------*/
51
52/* signature:
53void VG_(disp_run_translations)( UWord* two_words,
54                                 void*  guest_state,
55                                 Addr   host_addr );
56*/
57.text
58.globl  VG_(disp_run_translations)
59.type  VG_(disp_run_translations), @function
60VG_(disp_run_translations):
61	/* r3 holds two_words */
62	/* r4 holds guest_state */
63        /* r5 holds host_addr */
64
65        /* ----- entry point to VG_(disp_run_translations) ----- */
66        /* For Linux/ppc32 we need the SysV ABI, which uses
67           LR->4(parent_sp), CR->anywhere.
68           (The AIX ABI, used on Darwin,
69           uses LR->8(prt_sp), CR->4(prt_sp))
70        */
71
72        /* Save lr */
73        mflr    6
74        stw     6,4(1)
75
76        /* New stack frame */
77        stwu    1,-496(1)  /* sp should maintain 16-byte alignment */
78
79        /* Save callee-saved registers... */
80	/* r3, r4, r5 are live here, so use r6 */
81        lis     6,VG_(machine_ppc32_has_FP)@ha
82        lwz     6,VG_(machine_ppc32_has_FP)@l(6)
83        cmplwi  6,0
84        beq     LafterFP1
85
86        /* Floating-point reg save area : 144 bytes */
87        stfd    31,488(1)
88        stfd    30,480(1)
89        stfd    29,472(1)
90        stfd    28,464(1)
91        stfd    27,456(1)
92        stfd    26,448(1)
93        stfd    25,440(1)
94        stfd    24,432(1)
95        stfd    23,424(1)
96        stfd    22,416(1)
97        stfd    21,408(1)
98        stfd    20,400(1)
99        stfd    19,392(1)
100        stfd    18,384(1)
101        stfd    17,376(1)
102        stfd    16,368(1)
103        stfd    15,360(1)
104        stfd    14,352(1)
105LafterFP1:
106
107        /* General reg save area : 76 bytes */
108        stw     31,348(1)
109        stw     30,344(1)
110        stw     29,340(1)
111        stw     28,336(1)
112        stw     27,332(1)
113        stw     26,328(1)
114        stw     25,324(1)
115        stw     24,320(1)
116        stw     23,316(1)
117        stw     22,312(1)
118        stw     21,308(1)
119        stw     20,304(1)
120        stw     19,300(1)
121        stw     18,296(1)
122        stw     17,292(1)
123        stw     16,288(1)
124        stw     15,284(1)
125        stw     14,280(1)
126        stw     13,276(1)
127        stw     3,272(1)  /* save two_words for later */
128
129        /* It's necessary to save/restore VRSAVE in the AIX / Darwin ABI.
130           The Linux kernel might not actually use VRSAVE for its intended
131           purpose, but it should be harmless to preserve anyway. */
132	/* r3, r4, r5 are live here, so use r6 */
133        lis     6,VG_(machine_ppc32_has_VMX)@ha
134        lwz     6,VG_(machine_ppc32_has_VMX)@l(6)
135        cmplwi  6,0
136        beq     LafterVMX1
137
138#ifdef HAS_ALTIVEC
139        /* VRSAVE save word : 32 bytes */
140        mfspr   6,256         /* vrsave reg is spr number 256 */
141        stw     6,244(1)
142
143        /* Alignment padding : 4 bytes */
144
145        /* Vector reg save area (quadword aligned) : 192 bytes */
146        li      6,224
147        stvx    31,6,1
148        li      6,208
149        stvx    30,6,1
150        li      6,192
151        stvx    29,6,1
152        li      6,176
153        stvx    28,6,1
154        li      6,160
155        stvx    27,6,1
156        li      6,144
157        stvx    26,6,1
158        li      6,128
159        stvx    25,6,1
160        li      6,112
161        stvx    24,6,1
162        li      6,96
163        stvx    23,6,1
164        li      6,80
165        stvx    22,6,1
166        li      6,64
167        stvx    21,6,1
168        li      6,48
169        stvx    20,6,1
170#endif
171
172LafterVMX1:
173
174        /* Save cr */
175        mfcr    6
176        stw     6,44(1)
177
178        /* Local variable space... */
179
180        /* 32(sp) used later to check FPSCR[RM] */
181
182	/* r3 holds two_words */
183	/* r4 holds guest_state */
184        /* r5 holds host_addr */
185
186        /* 24(sp) used later to stop ctr reg being clobbered */
187        /* 20(sp) used later to load fpscr with zero */
188        /* 8:16(sp) free */
189
190        /* Linkage Area (reserved)
191           4(sp)  : LR
192           0(sp)  : back-chain
193        */
194
195        /* set host FPU control word to the default mode expected
196           by VEX-generated code.  See comments in libvex.h for
197           more info. */
198        lis     6,VG_(machine_ppc32_has_FP)@ha
199        lwz     6,VG_(machine_ppc32_has_FP)@l(6)
200        cmplwi  6,0
201        beq     LafterFP2
202
203        /* get zero into f3 (tedious) */
204        /* note: fsub 3,3,3 is not a reliable way to do this,
205           since if f3 holds a NaN or similar then we don't necessarily
206           wind up with zero. */
207        li      6,0
208        stw     6,20(1)
209        lfs     3,20(1)
210        mtfsf   0xFF,3   /* fpscr = f3 */
211LafterFP2:
212
213        /* set host AltiVec control word to the default mode expected
214           by VEX-generated code. */
215        lis     6,VG_(machine_ppc32_has_VMX)@ha
216        lwz     6,VG_(machine_ppc32_has_VMX)@l(6)
217        cmplwi  6,0
218        beq     LafterVMX2
219
220#ifdef HAS_ALTIVEC
221        vspltisw 3,0x0  /* generate zero */
222        mtvscr  3
223#endif
224
225LafterVMX2:
226
227        /* make a stack frame for the code we are calling */
228        stwu    1,-16(1)
229
230        /* Set up the guest state ptr */
231        mr      31,4      /* r31 (generated code gsp) = r4 */
232
233        /* and jump into the code cache.  Chained translations in
234           the code cache run, until for whatever reason, they can't
235           continue.  When that happens, the translation in question
236           will jump (or call) to one of the continuation points
237           VG_(cp_...) below. */
238        mtctr   5
239        bctr
240	/*NOTREACHED*/
241
242/*----------------------------------------------------*/
243/*--- Postamble and exit.                          ---*/
244/*----------------------------------------------------*/
245
246postamble:
247        /* At this point, r6 and r7 contain two
248           words to be returned to the caller.  r6
249           holds a TRC value, and r7 optionally may
250           hold another word (for CHAIN_ME exits, the
251           address of the place to patch.) */
252
253        /* We're leaving.  Check that nobody messed with
254           VSCR or FPSCR in ways we don't expect. */
255        /* Using r10 - value used again further on, so don't trash! */
256        lis     10,VG_(machine_ppc32_has_FP)@ha
257        lwz     10,VG_(machine_ppc32_has_FP)@l(10)
258
259	/* Using r11 - value used again further on, so don't trash! */
260        lis     11,VG_(machine_ppc32_has_VMX)@ha
261        lwz     11,VG_(machine_ppc32_has_VMX)@l(11)
262
263        cmplwi  10,0    /* Do we have FP ? */
264        beq     LafterFP8
265
266	/* Set fpscr back to a known state, since vex-generated code
267	   may have messed with fpscr[rm]. */
268        li      5,0
269        addi    1,1,-16
270        stw     5,0(1)
271        lfs     3,0(1)
272        addi    1,1,16
273        mtfsf   0xFF,3   /* fpscr = f3 */
274LafterFP8:
275
276        cmplwi  11,0    /* Do we have altivec? */
277        beq     LafterVMX8
278
279#ifdef HAS_ALTIVEC
280        /* Check VSCR[NJ] == 1 */
281        /* first generate 4x 0x00010000 */
282        vspltisw  4,0x1                   /* 4x 0x00000001 */
283        vspltisw  5,0x0                   /* zero */
284        vsldoi    6,4,5,0x2               /* <<2*8 => 4x 0x00010000 */
285        /* retrieve VSCR and mask wanted bits */
286        mfvscr    7
287        vand      7,7,6                   /* gives NJ flag */
288        vspltw    7,7,0x3                 /* flags-word to all lanes */
289        vcmpequw. 8,6,7                   /* CR[24] = 1 if v6 == v7 */
290        bt        24,invariant_violation  /* branch if all_equal */
291#endif
292
293LafterVMX8:
294	/* otherwise we're OK */
295        b       remove_frame
296
297invariant_violation:
298        li      6,VG_TRC_INVARIANT_FAILED
299        li      7,0
300        /* fall through */
301
302remove_frame:
303        /* Restore FP regs */
304        /* r10 already holds VG_(machine_ppc32_has_FP) value */
305        cmplwi  10,0
306        beq     LafterFP9
307
308        /* Floating-point regs */
309        lfd     31,488(1)
310        lfd     30,480(1)
311        lfd     29,472(1)
312        lfd     28,464(1)
313        lfd     27,456(1)
314        lfd     26,448(1)
315        lfd     25,440(1)
316        lfd     24,432(1)
317        lfd     23,424(1)
318        lfd     22,416(1)
319        lfd     21,408(1)
320        lfd     20,400(1)
321        lfd     19,392(1)
322        lfd     18,384(1)
323        lfd     17,376(1)
324        lfd     16,368(1)
325        lfd     15,360(1)
326        lfd     14,352(1)
327LafterFP9:
328
329        /* r11 already holds VG_(machine_ppc32_has_VMX) value */
330        cmplwi  11,0
331        beq     LafterVMX9
332
333        /* Restore Altivec regs */
334#ifdef HAS_ALTIVEC
335        /* VRSAVE */
336        lwz     4,244(1)
337        mfspr   4,256         /* VRSAVE reg is spr number 256 */
338
339        /* Vector regs */
340        li      4,224
341        lvx     31,4,1
342        li      4,208
343        lvx     30,4,1
344        li      4,192
345        lvx     29,4,1
346        li      4,176
347        lvx     28,4,1
348        li      4,160
349        lvx     27,4,1
350        li      4,144
351        lvx     26,4,1
352        li      4,128
353        lvx     25,4,1
354        li      4,112
355        lvx     24,4,1
356        li      4,96
357        lvx     23,4,1
358        li      4,80
359        lvx     22,4,1
360        li      4,64
361        lvx     21,4,1
362        li      4,48
363        lvx     20,4,1
364#endif
365LafterVMX9:
366
367        /* restore int regs, including importantly r3 (two_words) */
368        addi    1,1,16
369        lwz     31,348(1)
370        lwz     30,344(1)
371        lwz     29,340(1)
372        lwz     28,336(1)
373        lwz     27,332(1)
374        lwz     26,328(1)
375        lwz     25,324(1)
376        lwz     24,320(1)
377        lwz     23,316(1)
378        lwz     22,312(1)
379        lwz     21,308(1)
380        lwz     20,304(1)
381        lwz     19,300(1)
382        lwz     18,296(1)
383        lwz     17,292(1)
384        lwz     16,288(1)
385        lwz     15,284(1)
386        lwz     14,280(1)
387        lwz     13,276(1)
388        lwz     3,272(1)
389        /* Stash return values */
390        stw     6,0(3)
391        stw     7,4(3)
392
393        /* restore lr & sp, and leave */
394        lwz     0,500(1)  /* stack_size + 4 */
395        mtlr    0
396        addi    1,1,496   /* stack_size */
397        blr
398
399
400/*----------------------------------------------------*/
401/*--- Continuation points                          ---*/
402/*----------------------------------------------------*/
403
404/* ------ Chain me to slow entry point ------ */
405.global VG_(disp_cp_chain_me_to_slowEP)
406VG_(disp_cp_chain_me_to_slowEP):
407        /* We got called.  The return address indicates
408           where the patching needs to happen.  Collect
409           the return address and, exit back to C land,
410           handing the caller the pair (Chain_me_S, RA) */
411        li   6, VG_TRC_CHAIN_ME_TO_SLOW_EP
412        mflr 7
413        /* 8 = imm32-fixed2 r30, disp_cp_chain_me_to_slowEP
414           4 = mtctr r30
415           4 = btctr
416        */
417        subi 7,7,8+4+4
418        b    postamble
419
420/* ------ Chain me to fast entry point ------ */
421.global VG_(disp_cp_chain_me_to_fastEP)
422VG_(disp_cp_chain_me_to_fastEP):
423        /* We got called.  The return address indicates
424           where the patching needs to happen.  Collect
425           the return address and, exit back to C land,
426           handing the caller the pair (Chain_me_S, RA) */
427        li   6, VG_TRC_CHAIN_ME_TO_FAST_EP
428        mflr 7
429        /* 8 = imm32-fixed2 r30, disp_cp_chain_me_to_fastEP
430           4 = mtctr r30
431           4 = btctr
432        */
433        subi 7,7,8+4+4
434        b    postamble
435
436/* ------ Indirect but boring jump ------ */
437.global VG_(disp_cp_xindir)
438VG_(disp_cp_xindir):
439        /* Where are we going? */
440        lwz     3,OFFSET_ppc32_CIA(31)
441
442        /* stats only */
443        lis     5,VG_(stats__n_xindirs_32)@ha
444        addi    5,5,VG_(stats__n_xindirs_32)@l
445        lwz     6,0(5)
446        addi    6,6,1
447        stw     6,0(5)
448
449        /* r5 = &VG_(tt_fast) */
450        lis	5,VG_(tt_fast)@ha
451        addi    5,5,VG_(tt_fast)@l   /* & VG_(tt_fast) */
452
453        /* try a fast lookup in the translation cache */
454        /* r4 = VG_TT_FAST_HASH(addr)           * sizeof(FastCacheEntry)
455              = ((r3 >>u 2) & VG_TT_FAST_MASK)  << 3 */
456	rlwinm	4,3,1, 29-VG_TT_FAST_BITS, 28	/* entry# * 8 */
457	add	5,5,4	/* & VG_(tt_fast)[entry#] */
458	lwz	6,0(5)   /* .guest */
459	lwz	7,4(5)   /* .host */
460        cmpw    3,6
461        bne     fast_lookup_failed
462
463        /* Found a match.  Jump to .host. */
464        mtctr   7
465        bctr
466
467fast_lookup_failed:
468        /* stats only */
469        lis     5,VG_(stats__n_xindir_misses_32)@ha
470        addi    5,5,VG_(stats__n_xindir_misses_32)@l
471        lwz     6,0(5)
472        addi    6,6,1
473        stw     6,0(5)
474
475        li      6,VG_TRC_INNER_FASTMISS
476        li      7,0
477        b       postamble
478	/*NOTREACHED*/
479
480/* ------ Assisted jump ------ */
481.global VG_(disp_cp_xassisted)
482VG_(disp_cp_xassisted):
483        /* r31 contains the TRC */
484        mr      6,31
485        li      7,0
486        b       postamble
487
488/* ------ Event check failed ------ */
489.global VG_(disp_cp_evcheck_fail)
490VG_(disp_cp_evcheck_fail):
491        li      6,VG_TRC_INNER_COUNTERZERO
492        li      7,0
493        b       postamble
494
495
496.size VG_(disp_run_translations), .-VG_(disp_run_translations)
497
498#endif // defined(VGP_ppc32_linux)
499
500/* Let the linker know we don't need an executable stack */
501MARK_STACK_NO_EXEC
502
503/*--------------------------------------------------------------------*/
504/*--- end                                                          ---*/
505/*--------------------------------------------------------------------*/
506