1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "berberis/runtime_primitives/runtime_library.h"
18 
19 #include "berberis/base/checks.h"
20 #include "berberis/base/config.h"
21 #include "berberis/guest_state/guest_state.h"
22 
23 // Perform all the steps needed to exit generated code except return, which is
24 // up to the users of this macro. The users of this macro may choose to perform
25 // a sibling call as necessary.
26 // clang-format off
27 #define END_GENERATED_CODE(EXIT_INSN)                                   \
28   asm(                                                                  \
29       /* Sync insn_addr. */                                             \
30       "mov %%rax, %[InsnAddr](%%rbp)\n"                                 \
31       /* Set kOutsideGeneratedCode residence. */                        \
32       "movb %[OutsideGeneratedCode], %[Residence](%%rbp)\n"             \
33                                                                         \
34       /* Set %rdi to the pointer to the guest state so that             \
35        * we can perform a sibling call to functions like                \
36        * berberis_HandleNotTranslated.                                  \
37        */                                                               \
38       "mov %%rbp, %%rdi\n"                                              \
39                                                                         \
40       /* Restore stack */                                               \
41       "add %[FrameSizeAtTranslatedCode], %%rsp\n"                       \
42                                                                         \
43       /* Epilogue */                                                    \
44       "pop %%r15\n"                                                     \
45       "pop %%r14\n"                                                     \
46       "pop %%r13\n"                                                     \
47       "pop %%r12\n"                                                     \
48       "pop %%rbx\n"                                                     \
49       "pop %%rbp\n"                                                     \
50       EXIT_INSN                                                         \
51       ::[InsnAddr] "p"(offsetof(berberis::ThreadState, cpu.insn_addr)), \
52       [Residence] "p"(offsetof(berberis::ThreadState, residence)),      \
53       [OutsideGeneratedCode] "J"(berberis::kOutsideGeneratedCode),      \
54       [FrameSizeAtTranslatedCode] "J"(berberis::config::kFrameSizeAtTranslatedCode))
55 // clang-format on
56 
57 namespace berberis {
58 
59 // "Calling conventions" among generated code and trampolines
60 // ==========================================================
61 //
62 // Introduction
63 // ------------
64 //
65 // To ensure the high performance of our generated code, we employ a couple of
66 // techniques:
67 //
68 // - We allow generated regions to jump among them without transferring control
69 //   back to Berberis runtime.
70 //
71 // - We use custom "calling conventions" that are different from the standard
72 //   x86_64 calling conventions, with some items passed in registers.
73 //
74 // Entry and exits
75 // ---------------
76 //
77 // Upon entry into generated code and trampoline adapters, we must have:
78 //
79 // - %rbp pointing to CPUState,
80 //
81 // - every field in CPUState up to date, except insn_addr, and
82 //
83 // - %rax containing up-to-date value for potentially stale CPUState::insn_addr.
84 //
85 // Since we jump among generated code and trampolines, each region must adhere
86 // to the "calling conventions" above as it exits.
87 //
88 // Each region is allowed to use the stack pointed to by %rsp. However, it must
89 // restore %rsp before exiting.
90 //
91 // %rbx, %rbp, and %r12-%r15 are callee saved, all other registers are
92 // "caller saved". That is, regions are allowed to use them without restoring
93 // their original values.
94 //
95 // Berberis -> generated code
96 // ---------------------------------
97 //
98 // If we are transferring control to generated code and trampolines from the
99 // Berberis runtime, such as ExecuteGuest, then we must do so via
100 // berberis_RunGeneratedCode, which is responsible for setting up registers for
101 // the "calling conventions".
102 //
103 // Generated code -> Berberis
104 // ---------------------------------
105 //
106 // When we are exiting generate code, we must do so via END_GENERATED_CODE macro
107 // defined in this file. The macro ensures that CPUState is fully up to date,
108 // including insn_addr, before transferring control back to the Berberis
109 // runtime.
110 
111 extern "C" {
112 
berberis_RunGeneratedCode(ThreadState * state,HostCode code)113 [[gnu::naked]] [[gnu::noinline]] void berberis_RunGeneratedCode(ThreadState* state, HostCode code) {
114   // Parameters are in %rdi - state and %rsi - code
115   //
116   // On x86_64 Linux, stack should be aligned on 16 at every call insn.
117   // That means stack is 8 mod 16 on function entry.
118   // See https://software.intel.com/sites/default/files/article/402129/mpx-linux64-abi.pdf (3.2.2)
119   //
120   // Stack:
121   //  0:               <- stack after prologue, aligned for next call
122   //  8: saved r15     <- stack after prologue
123   // 16: saved r14
124   // 24: saved r13
125   // 32: saved r12
126   // 40: saved rbx
127   // 48: saved rbp
128   // 56: return addr
129   // 00: <- stack at call insn - aligned on 16
130 
131   // clang-format off
132   asm(
133     // Prologue
134     "push %%rbp\n"
135     "push %%rbx\n"
136     "push %%r12\n"
137     "push %%r13\n"
138     "push %%r14\n"
139     "push %%r15\n"
140 
141     // Align stack for next call
142     "sub %[FrameSizeAtTranslatedCode], %%rsp\n"  // kStackAlignAtCall, kFrameSizeAtTranslatedCode
143 
144     // Set state pointer
145     "mov %%rdi, %%rbp\n"  // kStateRegister, kOmitFramePointer
146 
147     // Set insn_addr.
148     "mov %[InsnAddr](%%rbp), %%rax\n"
149     // Set kInsideGeneratedCode residence.
150     "movb %[InsideGeneratedCode], %[Residence](%%rbp)\n"
151 
152     // Jump to entry
153     "jmp *%%rsi"
154     ::[InsnAddr] "p"(offsetof(ThreadState, cpu.insn_addr)),
155     [Residence] "p"(offsetof(ThreadState, residence)),
156     [InsideGeneratedCode] "J"(kInsideGeneratedCode),
157     [FrameSizeAtTranslatedCode] "J"(config::kFrameSizeAtTranslatedCode));
158   // clang-format on
159 }
160 
berberis_entry_Interpret()161 extern "C" [[gnu::naked]] [[gnu::noinline]] void berberis_entry_Interpret() {
162   // clang-format off
163   asm(
164     // Sync insn_addr.
165     "mov %%rax, %[InsnAddr](%%rbp)\n"
166     // Set kOutsideGeneratedCode residence.
167     "movb %[OutsideGeneratedCode], %[Residence](%%rbp)\n"
168 
169     // rbp holds the pointer to state which is the argument to the call.
170     "mov %%rbp, %%rdi\n"
171     "call berberis_HandleInterpret\n"
172 
173     // rdi may be clobbered by the call above, so init it again.
174     "mov %%rbp, %%rdi\n"
175     "call berberis_GetDispatchAddress\n"
176     "mov %%rax, %%rcx\n"
177 
178     // Set insn_addr.
179     "mov %[InsnAddr](%%rbp), %%rax\n"
180     // Set kInsideGeneratedCode residence.
181     "movb %[InsideGeneratedCode], %[Residence](%%rbp)\n"
182 
183     "jmp *%%rcx\n"
184     ::[InsnAddr] "p"(offsetof(berberis::ThreadState, cpu.insn_addr)),
185     [Residence] "p"(offsetof(berberis::ThreadState, residence)),
186     [OutsideGeneratedCode] "J"(berberis::kOutsideGeneratedCode),
187     [InsideGeneratedCode] "J"(berberis::kInsideGeneratedCode),
188     [FrameSizeAtTranslatedCode] "J"(berberis::config::kFrameSizeAtTranslatedCode));
189   // clang-format on
190 }
191 
berberis_entry_ExitGeneratedCode()192 [[gnu::naked]] [[gnu::noinline]] void berberis_entry_ExitGeneratedCode() {
193   END_GENERATED_CODE("ret");
194 }
195 
berberis_entry_Stop()196 [[gnu::naked]] [[gnu::noinline]] void berberis_entry_Stop() {
197   END_GENERATED_CODE("ret");
198 }
199 
berberis_entry_NoExec()200 [[gnu::naked]] [[gnu::noinline]] void berberis_entry_NoExec() {
201   END_GENERATED_CODE("jmp berberis_HandleNoExec");
202   // void berberis_HandleNoExec(ThreadState*);
203   // Perform a sibling call to berberis_HandleNoExec. The only parameter
204   // is state which is saved in %rdi by END_GENERATED_CODE. We could call the
205   // function here instead of jumping to it, but it would be more work to do
206   // so because we would have to align the stack and issue the "ret"
207   // instruction after the call.
208   // TODO(b/232598137): Remove state from HandleNoExec parameters. Get it from
209   // the guest thread instead.
210 }
211 
berberis_entry_NotTranslated()212 [[gnu::naked]] [[gnu::noinline]] void berberis_entry_NotTranslated() {
213   END_GENERATED_CODE("jmp berberis_HandleNotTranslated");
214   // void berberis_HandleNotTranslated(ThreadState*);
215   // See the comment above about the sibling call.
216 }
217 
berberis_entry_Translating()218 [[gnu::naked]] [[gnu::noinline]] void berberis_entry_Translating() {
219   // TODO(b/232598137): Run interpreter while translation is in progress.
220   END_GENERATED_CODE("ret");
221 }
222 
berberis_entry_Invalidating()223 [[gnu::naked]] [[gnu::noinline]] void berberis_entry_Invalidating() {
224   // TODO(b/232598137): maybe call sched_yield() here.
225   END_GENERATED_CODE("ret");
226 }
227 
berberis_entry_Wrapping()228 [[gnu::naked]] [[gnu::noinline]] void berberis_entry_Wrapping() {
229   // TODO(b/232598137): maybe call sched_yield() here.
230   END_GENERATED_CODE("ret");
231 }
232 
berberis_entry_HandleLightCounterThresholdReached()233 [[gnu::naked]] [[gnu::noinline]] void berberis_entry_HandleLightCounterThresholdReached() {
234   // void berberis_HandleLightCounterThresholdReached(ProcessState*);
235   // Perform a sibling call to berberis_HandleLightCounterThresholdReached. The
236   // only parameter is state which is saved in %rdi by END_GENERATED_CODE. We
237   // could call the function here instead of jumping to it, but it would be more
238   // work to do so because we would have to align the stack and issue the "ret"
239   // instruction after the call.
240   // TODO(b/232598137): Remove state from HandleLightCounterThresholdReached
241   // parameters. Get it from the guest thread instead.
242   END_GENERATED_CODE("jmp berberis_HandleLightCounterThresholdReached");
243 }
244 
245 }  // extern "C"
246 
247 }  // namespace berberis
248