1
2/*--------------------------------------------------------------------*/
3/*--- begin                                dispatch-tilegx-linux.S ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7  This file is part of Valgrind, a dynamic binary instrumentation
8  framework.
9
10  Copyright (C) 2010-2015  Tilera Corp.
11
12  This program is free software; you can redistribute it and/or
13  modify it under the terms of the GNU General Public License as
14  published by the Free Software Foundation; either version 2 of the
15  License, or (at your option) any later version.
16
17  This program is distributed in the hope that it will be useful, but
18  WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  General Public License for more details.
21
22  You should have received a copy of the GNU General Public License
23  along with this program; if not, write to the Free Software
24  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25  02111-1307, USA.
26
27  The GNU General Public License is contained in the file COPYING.
28*/
29
30/* Contributed by Zhi-Gang Liu <zliu at tilera dot com> */
31
32#include "pub_core_basics_asm.h"
33
34#if defined(VGP_tilegx_linux)
35#include "pub_core_dispatch_asm.h"
36#include "pub_core_transtab_asm.h"
37#include "libvex_guest_offsets.h"       /* for OFFSET_tilegx_PC */
38
39        /*------------------------------------------------------------*/
40        /*---                                                      ---*/
41        /*--- The dispatch loop.  VG_(run_innerloop) is used to    ---*/
42        /*--- run all translations except no-redir ones.           ---*/
43        /*---                                                      ---*/
44        /*------------------------------------------------------------*/
45
46        /*----------------------------------------------------*/
47        /*--- Preamble (set everything up)                 ---*/
48        /*----------------------------------------------------*/
49
50        /* signature:
51        void VG_(disp_run_translations)(UWord* two_words,
52        void*  guest_state,
53        Addr   host_addr );
54        UWord VG_(run_innerloop) ( void* guest_state, UWord do_profiling );
55        */
56
57        .text
58        .globl  VG_(disp_run_translations)
59        VG_(disp_run_translations):
60
61        /* r0 holds two_words
62           r1 holds guest_state
63           r2 holds host_addr */
64
65        /* New stack frame */
66        addli sp, sp, -256
67        addi  r29, sp, 8
68        /*
69        high memory of stack
70        216  lr
71        208  r53
72        200  r52
73        192  r51
74        ...
75        48   r33
76        40   r32
77        32   r31
78        24   r30
79        16   r1 <---
80        8    r0
81        0       <-sp
82        */
83        st_add r29, r0, 8
84        st_add r29, r1, 8
85
86        /* ... and r30 - r53 */
87        st_add  r29, r30, 8
88        st_add  r29, r31, 8
89        st_add  r29, r32, 8
90        st_add  r29, r33, 8
91        st_add  r29, r34, 8
92        st_add  r29, r35, 8
93        st_add  r29, r36, 8
94        st_add  r29, r37, 8
95        st_add  r29, r38, 8
96        st_add  r29, r39, 8
97        st_add  r29, r40, 8
98        st_add  r29, r41, 8
99        st_add  r29, r42, 8
100        st_add  r29, r43, 8
101        st_add  r29, r44, 8
102        st_add  r29, r45, 8
103        st_add  r29, r46, 8
104        st_add  r29, r47, 8
105        st_add  r29, r48, 8
106        st_add  r29, r49, 8
107        st_add  r29, r50, 8
108        st_add  r29, r51, 8
109        st_add  r29, r52, 8
110        st_add  r29, r53, 8
111        st      r29, lr
112
113        /* Load the address of guest state into guest state register r50. */
114        move r50, r1
115
116        //j postamble
117
118        /* jump to the code cache. */
119        jr  r2
120        /*NOTREACHED*/
121
122
123       /*----------------------------------------------------*/
124       /*--- Postamble and exit.                          ---*/
125       /*----------------------------------------------------*/
126
127postamble:
128        /* At this point, r12 and r13 contain two
129        words to be returned to the caller.  r12
130        holds a TRC value, and r13 optionally may
131        hold another word (for CHAIN_ME exits, the
132        address of the place to patch.) */
133
134        /* run_innerloop_exit_REALLY:
135        r50 holds VG_TRC_* value to return
136        Return to parent stack
137        addli  sp, sp, 256 */
138
139        addi r29, sp, 8
140
141        /* Restore r0 from stack; holding address of twp words */
142        ld_add  r0, r29, 16
143        /* store r12 in two_words[0] */
144        st_add  r0, r12, 8
145        /* store r13 in two_words[1] */
146        st  r0, r13
147
148        /* Restore callee-saved registers... */
149        ld_add  r30, r29, 8
150        ld_add  r31, r29, 8
151        ld_add  r32, r29, 8
152        ld_add  r33, r29, 8
153        ld_add  r34, r29, 8
154        ld_add  r35, r29, 8
155        ld_add  r36, r29, 8
156        ld_add  r37, r29, 8
157        ld_add  r38, r29, 8
158        ld_add  r39, r29, 8
159        ld_add  r40, r29, 8
160        ld_add  r41, r29, 8
161        ld_add  r42, r29, 8
162        ld_add  r43, r29, 8
163        ld_add  r44, r29, 8
164        ld_add  r45, r29, 8
165        ld_add  r46, r29, 8
166        ld_add  r47, r29, 8
167        ld_add  r48, r29, 8
168        ld_add  r49, r29, 8
169        ld_add  r50, r29, 8
170        ld_add  r51, r29, 8
171        ld_add  r52, r29, 8
172        ld_add  r53, r29, 8
173        ld      lr, r29
174        addli   sp, sp, 256   /* stack_size */
175        jr      lr
176        nop
177
178
179       /*----------------------------------------------------*/
180       /*---           Continuation points                ---*/
181       /*----------------------------------------------------*/
182
183       /* ------ Chain me to slow entry point ------ */
184       .global VG_(disp_cp_chain_me_to_slowEP)
185       VG_(disp_cp_chain_me_to_slowEP):
186        /* We got called.  The return address indicates
187        where the patching needs to happen.  Collect
188        the return address and, exit back to C land,
189        handing the caller the pair (Chain_me_S, RA) */
190        # if (VG_TRC_CHAIN_ME_TO_SLOW_EP > 128)
191        # error ("VG_TRC_CHAIN_ME_TO_SLOW_EP is > 128");
192        # endif
193        moveli r12, VG_TRC_CHAIN_ME_TO_SLOW_EP
194        move   r13, lr
195        /* 32 = mkLoadImm_EXACTLY4
196        8 = jalr r9
197        8 = nop */
198        addi   r13, r13, -40
199        j      postamble
200
201        /* ------ Chain me to slow entry point ------ */
202        .global VG_(disp_cp_chain_me_to_fastEP)
203        VG_(disp_cp_chain_me_to_fastEP):
204        /* We got called.  The return address indicates
205        where the patching needs to happen.  Collect
206        the return address and, exit back to C land,
207        handing the caller the pair (Chain_me_S, RA) */
208        # if (VG_TRC_CHAIN_ME_TO_FAST_EP > 128)
209        # error ("VG_TRC_CHAIN_ME_TO_FAST_EP is > 128");
210        # endif
211        moveli r12, VG_TRC_CHAIN_ME_TO_FAST_EP
212        move   r13, lr
213        /* 32 = mkLoadImm_EXACTLY4
214        8 = jalr r9
215        8 = nop */
216        addi   r13, r13, -40
217        j      postamble
218
219        /* ------ Indirect but boring jump ------ */
220        .global VG_(disp_cp_xindir)
221        VG_(disp_cp_xindir):
222        /* Where are we going? */
223        addli    r11, r50, OFFSET_tilegx_pc
224        ld       r11, r11
225
226        moveli      r7, hw2_last(VG_(stats__n_xindirs_32))
227        shl16insli  r7, r7, hw1(VG_(stats__n_xindirs_32))
228        shl16insli  r7, r7, hw0(VG_(stats__n_xindirs_32))
229        ld4u   r6, r7
230        addi   r6, r6, 1
231        st4    r7, r6
232
233        /* try a fast lookup in the translation cache */
234        /* r14 = VG_TT_FAST_HASH(addr) * sizeof(ULong*)
235        = (t8 >> 3 & VG_TT_FAST_MASK)  << 3 */
236
237        move    r14, r11
238        /* Assume VG_TT_FAST_MASK < 4G */
239        moveli  r12, hw1(VG_TT_FAST_MASK)
240        shl16insli r12, r12, hw0(VG_TT_FAST_MASK)
241        shrui   r14, r14, 3
242        and     r14, r14, r12
243        shli    r14, r14, 4
244        /* Note, each tt_fast hash entry has two pointers i.e. 16 Bytes. */
245
246        /* r13 = (addr of VG_(tt_fast)) + r14 */
247        moveli  r13, hw2_last(VG_(tt_fast))
248        shl16insli   r13, r13, hw1(VG_(tt_fast))
249        shl16insli   r13, r13, hw0(VG_(tt_fast))
250
251        add     r13, r13, r14
252
253        /* r12 = VG_(tt_fast)[hash] :: ULong* */
254        ld_add  r12, r13, 8
255
256        {
257        ld      r25, r13
258        sub     r7, r12, r11
259        }
260
261        bnez     r7, fast_lookup_failed
262
263        /* Run the translation */
264        jr      r25
265
266        .quad   0x0
267
268fast_lookup_failed:
269        /* %PC is up to date */
270        /* back out decrement of the dispatch counter */
271        /* hold dispatch_ctr in t0 (r8) */
272
273        moveli      r7, hw2_last(VG_(stats__n_xindir_misses_32))
274        shl16insli  r7, r7, hw1(VG_(stats__n_xindir_misses_32))
275        shl16insli  r7, r7, hw0(VG_(stats__n_xindir_misses_32))
276        ld4u  r6, r7
277        addi  r6, r6, 1
278        st4   r7, r6
279        moveli  r12, VG_TRC_INNER_FASTMISS
280        movei   r13, 0
281        j       postamble
282
283        /* ------ Assisted jump ------ */
284        .global VG_(disp_cp_xassisted)
285        VG_(disp_cp_xassisted):
286        /* guest-state-pointer contains the TRC. Put the value into the
287        return register */
288        move    r12, r50
289        movei   r13, 0
290        j       postamble
291
292        /* ------ Event check failed ------ */
293        .global VG_(disp_cp_evcheck_fail)
294        VG_(disp_cp_evcheck_fail):
295        moveli  r12, VG_TRC_INNER_COUNTERZERO
296        movei   r13, 0
297        j       postamble
298
299        .size VG_(disp_run_translations), .-VG_(disp_run_translations)
300
301#endif /* defined(VGP_tilegx_linux) */
302
303/* Let the linker know we don't need an executable stack */
304MARK_STACK_NO_EXEC
305
306/*--------------------------------------------------------------------*/
307/*--- end                                                          ---*/
308/*--------------------------------------------------------------------*/
309
310