1 /*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/profiling/perf/regs_parsing.h"
18
19 #include <inttypes.h>
20 #include <linux/perf_event.h>
21 #include <stdint.h>
22 #include <unistd.h>
23 #include <memory>
24
25 #include <unwindstack/Elf.h>
26 #include <unwindstack/MachineArm.h>
27 #include <unwindstack/MachineArm64.h>
28 #include <unwindstack/Regs.h>
29 #include <unwindstack/RegsArm.h>
30 #include <unwindstack/RegsArm64.h>
31 #include <unwindstack/RegsX86.h>
32 #include <unwindstack/RegsX86_64.h>
33 #include <unwindstack/UserArm.h>
34 #include <unwindstack/UserArm64.h>
35 #include <unwindstack/UserX86.h>
36 #include <unwindstack/UserX86_64.h>
37
38 // kernel uapi headers
39 #include <uapi/asm-arm/asm/perf_regs.h>
40 #include <uapi/asm-x86/asm/perf_regs.h>
41 #define perf_event_arm_regs perf_event_arm64_regs
42 #include <uapi/asm-arm64/asm/perf_regs.h>
43 #undef perf_event_arm_regs
44
45 namespace perfetto {
46 namespace profiling {
47
48 namespace {
49
constexpr_max(size_t x,size_t y)50 constexpr size_t constexpr_max(size_t x, size_t y) {
51 return x > y ? x : y;
52 }
53
54 template <typename T>
ReadValue(T * value_out,const char * ptr)55 const char* ReadValue(T* value_out, const char* ptr) {
56 memcpy(value_out, reinterpret_cast<const void*>(ptr), sizeof(T));
57 return ptr + sizeof(T);
58 }
59
60 // Supported configurations:
61 // * 32 bit daemon, 32 bit userspace
62 // * 64 bit daemon, mixed bitness userspace
63 // Therefore give the kernel the mask corresponding to our build architecture.
64 // Register parsing handles the mixed userspace ABI cases.
65 // For simplicity, we ask for as many registers as we can, even if not all of
66 // them will be used during unwinding.
67 // TODO(rsavitski): cleanly detect 32 bit builds being side-loaded onto a system
68 // with 64 bit userspace processes.
PerfUserRegsMask(unwindstack::ArchEnum arch)69 uint64_t PerfUserRegsMask(unwindstack::ArchEnum arch) {
70 switch (static_cast<uint8_t>(arch)) { // cast to please -Wswitch-enum
71 case unwindstack::ARCH_ARM64:
72 return (1ULL << PERF_REG_ARM64_MAX) - 1;
73 case unwindstack::ARCH_ARM:
74 return ((1ULL << PERF_REG_ARM_MAX) - 1);
75 // perf on x86_64 doesn't allow sampling ds/es/fs/gs registers. See
76 // arch/x86/kernel/perf_regs.c in the kernel.
77 case unwindstack::ARCH_X86_64:
78 return (((1ULL << PERF_REG_X86_64_MAX) - 1) & ~(1ULL << PERF_REG_X86_DS) &
79 ~(1ULL << PERF_REG_X86_ES) & ~(1ULL << PERF_REG_X86_FS) &
80 ~(1ULL << PERF_REG_X86_GS));
81 // Note: excluding these segment registers might not be necessary on x86,
82 // but they won't be used anyway (so follow x64).
83 case unwindstack::ARCH_X86:
84 return ((1ULL << PERF_REG_X86_32_MAX) - 1) & ~(1ULL << PERF_REG_X86_DS) &
85 ~(1ULL << PERF_REG_X86_ES) & ~(1ULL << PERF_REG_X86_FS) &
86 ~(1ULL << PERF_REG_X86_GS);
87 default:
88 PERFETTO_FATAL("Unsupported architecture");
89 }
90 }
91
92 // Adjusts the given architecture enum based on the ABI (as recorded in the perf
93 // sample). Note: we do not support 64 bit samples on a 32 bit daemon build, so
94 // this only converts from 64 bit to 32 bit architectures.
ArchForAbi(unwindstack::ArchEnum arch,uint64_t abi)95 unwindstack::ArchEnum ArchForAbi(unwindstack::ArchEnum arch, uint64_t abi) {
96 if (arch == unwindstack::ARCH_ARM64 && abi == PERF_SAMPLE_REGS_ABI_32) {
97 return unwindstack::ARCH_ARM;
98 }
99 if (arch == unwindstack::ARCH_X86_64 && abi == PERF_SAMPLE_REGS_ABI_32) {
100 return unwindstack::ARCH_X86;
101 }
102 return arch;
103 }
104
105 // Register values as an array, indexed using the kernel uapi perf_events.h enum
106 // values. Unsampled values will be left as zeroes.
107 struct RawRegisterData {
108 static constexpr uint64_t kMaxSize =
109 constexpr_max(PERF_REG_ARM64_MAX,
110 constexpr_max(PERF_REG_ARM_MAX, PERF_REG_X86_64_MAX));
111 uint64_t regs[kMaxSize] = {};
112 };
113
114 // First converts the |RawRegisterData| array to libunwindstack's "user"
115 // register structs (which match the ptrace/coredump format, also available at
116 // <sys/user.h>), then constructs the relevant unwindstack::Regs subclass out
117 // of the latter.
ToLibUnwindstackRegs(const RawRegisterData & raw_regs,unwindstack::ArchEnum arch)118 std::unique_ptr<unwindstack::Regs> ToLibUnwindstackRegs(
119 const RawRegisterData& raw_regs,
120 unwindstack::ArchEnum arch) {
121 if (arch == unwindstack::ARCH_ARM64) {
122 static_assert(static_cast<int>(unwindstack::ARM64_REG_R0) ==
123 static_cast<int>(PERF_REG_ARM64_X0) &&
124 static_cast<int>(unwindstack::ARM64_REG_R0) == 0,
125 "register layout mismatch");
126 static_assert(static_cast<int>(unwindstack::ARM64_REG_R30) ==
127 static_cast<int>(PERF_REG_ARM64_LR),
128 "register layout mismatch");
129 // Both the perf_event register order and the "user" format are derived from
130 // "struct pt_regs", so we can directly memcpy the first 31 regs (up to and
131 // including LR).
132 unwindstack::arm64_user_regs arm64_user_regs = {};
133 memcpy(&arm64_user_regs.regs[0], &raw_regs.regs[0],
134 sizeof(uint64_t) * (PERF_REG_ARM64_LR + 1));
135 arm64_user_regs.sp = raw_regs.regs[PERF_REG_ARM64_SP];
136 arm64_user_regs.pc = raw_regs.regs[PERF_REG_ARM64_PC];
137 return std::unique_ptr<unwindstack::Regs>(
138 unwindstack::RegsArm64::Read(&arm64_user_regs));
139 }
140
141 if (arch == unwindstack::ARCH_ARM) {
142 static_assert(static_cast<int>(unwindstack::ARM_REG_R0) ==
143 static_cast<int>(PERF_REG_ARM_R0) &&
144 static_cast<int>(unwindstack::ARM_REG_R0) == 0,
145 "register layout mismatch");
146 static_assert(static_cast<int>(unwindstack::ARM_REG_LAST) ==
147 static_cast<int>(PERF_REG_ARM_MAX),
148 "register layout mismatch");
149 // As with arm64, the layouts match, but we need to downcast to u32.
150 unwindstack::arm_user_regs arm_user_regs = {};
151 for (size_t i = 0; i < unwindstack::ARM_REG_LAST; i++) {
152 arm_user_regs.regs[i] = static_cast<uint32_t>(raw_regs.regs[i]);
153 }
154 return std::unique_ptr<unwindstack::Regs>(
155 unwindstack::RegsArm::Read(&arm_user_regs));
156 }
157
158 if (arch == unwindstack::ARCH_X86_64) {
159 // We've sampled more registers than what libunwindstack will use. Don't
160 // copy over cs/ss/flags.
161 unwindstack::x86_64_user_regs x86_64_user_regs = {};
162 x86_64_user_regs.rax = raw_regs.regs[PERF_REG_X86_AX];
163 x86_64_user_regs.rbx = raw_regs.regs[PERF_REG_X86_BX];
164 x86_64_user_regs.rcx = raw_regs.regs[PERF_REG_X86_CX];
165 x86_64_user_regs.rdx = raw_regs.regs[PERF_REG_X86_DX];
166 x86_64_user_regs.r8 = raw_regs.regs[PERF_REG_X86_R8];
167 x86_64_user_regs.r9 = raw_regs.regs[PERF_REG_X86_R9];
168 x86_64_user_regs.r10 = raw_regs.regs[PERF_REG_X86_R10];
169 x86_64_user_regs.r11 = raw_regs.regs[PERF_REG_X86_R11];
170 x86_64_user_regs.r12 = raw_regs.regs[PERF_REG_X86_R12];
171 x86_64_user_regs.r13 = raw_regs.regs[PERF_REG_X86_R13];
172 x86_64_user_regs.r14 = raw_regs.regs[PERF_REG_X86_R14];
173 x86_64_user_regs.r15 = raw_regs.regs[PERF_REG_X86_R15];
174 x86_64_user_regs.rdi = raw_regs.regs[PERF_REG_X86_DI];
175 x86_64_user_regs.rsi = raw_regs.regs[PERF_REG_X86_SI];
176 x86_64_user_regs.rbp = raw_regs.regs[PERF_REG_X86_BP];
177 x86_64_user_regs.rsp = raw_regs.regs[PERF_REG_X86_SP];
178 x86_64_user_regs.rip = raw_regs.regs[PERF_REG_X86_IP];
179 return std::unique_ptr<unwindstack::Regs>(
180 unwindstack::RegsX86_64::Read(&x86_64_user_regs));
181 }
182
183 if (arch == unwindstack::ARCH_X86) {
184 // We've sampled more registers than what libunwindstack will use. Don't
185 // copy over cs/ss/flags.
186 unwindstack::x86_user_regs x86_user_regs = {};
187 x86_user_regs.eax = static_cast<uint32_t>(raw_regs.regs[PERF_REG_X86_AX]);
188 x86_user_regs.ebx = static_cast<uint32_t>(raw_regs.regs[PERF_REG_X86_BX]);
189 x86_user_regs.ecx = static_cast<uint32_t>(raw_regs.regs[PERF_REG_X86_CX]);
190 x86_user_regs.edx = static_cast<uint32_t>(raw_regs.regs[PERF_REG_X86_DX]);
191 x86_user_regs.ebp = static_cast<uint32_t>(raw_regs.regs[PERF_REG_X86_BP]);
192 x86_user_regs.edi = static_cast<uint32_t>(raw_regs.regs[PERF_REG_X86_DI]);
193 x86_user_regs.esi = static_cast<uint32_t>(raw_regs.regs[PERF_REG_X86_SI]);
194 x86_user_regs.esp = static_cast<uint32_t>(raw_regs.regs[PERF_REG_X86_SP]);
195 x86_user_regs.eip = static_cast<uint32_t>(raw_regs.regs[PERF_REG_X86_IP]);
196 return std::unique_ptr<unwindstack::Regs>(
197 unwindstack::RegsX86::Read(&x86_user_regs));
198 }
199
200 PERFETTO_FATAL("Unsupported architecture");
201 }
202
203 } // namespace
204
PerfUserRegsMaskForArch(unwindstack::ArchEnum arch)205 uint64_t PerfUserRegsMaskForArch(unwindstack::ArchEnum arch) {
206 return PerfUserRegsMask(arch);
207 }
208
209 // Assumes that the sampling was configured with
210 // |PerfUserRegsMaskForArch(unwindstack::Regs::CurrentArch())|.
ReadPerfUserRegsData(const char ** data)211 std::unique_ptr<unwindstack::Regs> ReadPerfUserRegsData(const char** data) {
212 unwindstack::ArchEnum requested_arch = unwindstack::Regs::CurrentArch();
213
214 // Layout, assuming a sparse bitmask requesting r1 and r15:
215 // userspace thread: [u64 abi] [u64 r1] [u64 r15]
216 // kernel thread: [u64 abi]
217 const char* parse_pos = *data;
218 uint64_t sampled_abi;
219 parse_pos = ReadValue(&sampled_abi, parse_pos);
220
221 // ABI_NONE means there were no registers, as we've sampled a kernel thread,
222 // which doesn't have userspace registers.
223 if (sampled_abi == PERF_SAMPLE_REGS_ABI_NONE) {
224 *data = parse_pos; // adjust caller's parsing position
225 return nullptr;
226 }
227
228 // Unpack the densely-packed register values into |RawRegisterData|, which has
229 // a value for every register (unsampled registers will be left at zero).
230 RawRegisterData raw_regs{};
231 uint64_t regs_mask = PerfUserRegsMaskForArch(requested_arch);
232 for (size_t i = 0; regs_mask && (i < RawRegisterData::kMaxSize); i++) {
233 if (regs_mask & (1ULL << i)) {
234 parse_pos = ReadValue(&raw_regs.regs[i], parse_pos);
235 }
236 }
237
238 // Special case: we've requested arm64 registers from a 64 bit kernel, but
239 // ended up sampling a 32 bit arm userspace process. The 32 bit execution
240 // state of the target process was saved by the exception entry in an
241 // ISA-specific way. The userspace R0-R14 end up saved as arm64 W0-W14, but
242 // the program counter (R15 on arm32) is still in PERF_REG_ARM64_PC (the 33rd
243 // register). So we can take the kernel-dumped 64 bit register state, reassign
244 // the PC into the R15 slot, and treat the resulting RawRegisterData as an
245 // arm32 register bank. See "Fundamentals of ARMv8-A" (ARM DOC
246 // 100878_0100_en), page 28.
247 // x86-64 doesn't need any such fixups.
248 if (requested_arch == unwindstack::ARCH_ARM64 &&
249 sampled_abi == PERF_SAMPLE_REGS_ABI_32) {
250 raw_regs.regs[PERF_REG_ARM_PC] = raw_regs.regs[PERF_REG_ARM64_PC];
251 }
252
253 *data = parse_pos; // adjust caller's parsing position
254
255 unwindstack::ArchEnum sampled_arch = ArchForAbi(requested_arch, sampled_abi);
256 return ToLibUnwindstackRegs(raw_regs, sampled_arch);
257 }
258
259 } // namespace profiling
260 } // namespace perfetto
261