1 // Copyright 2018 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/trace_event/cfi_backtrace_android.h"
6 
7 #include <sys/mman.h>
8 #include <sys/types.h>
9 
10 #include "base/android/apk_assets.h"
11 
12 #if !defined(ARCH_CPU_ARMEL)
13 #error This file should not be built for this architecture.
14 #endif
15 
16 /*
17 Basics of unwinding:
18 For each instruction in a function we need to know what is the offset of SP
19 (Stack Pointer) to reach the previous function's stack frame. To know which
20 function is being invoked, we need the return address of the next function. The
21 CFI information for an instruction is made up of 2 offsets, CFA (Call Frame
22 Address) offset and RA (Return Address) offset. The CFA offset is the change in
23 SP made by the function till the current instruction. This depends on amount of
24 memory allocated on stack by the function plus some registers that the function
25 stores that needs to be restored at the end of function. So, at each instruction
26 the CFA offset tells the offset from original SP before the function call. The
27 RA offset tells us the offset from the previous SP into the current function
28 where the return address is stored.
29 
30 The unwind table file has 2 tables UNW_INDEX and UNW_DATA, inspired from ARM
31 EHABI format. The first table contains function addresses and an index into the
32 UNW_DATA table. The second table contains one or more rows for the function
33 unwind information.
34 
35 UNW_INDEX contains two columns of N rows each, where N is the number of
36 functions.
37   1. First column 4 byte rows of all the function start address as offset from
38      start of the binary, in sorted order.
39   2. For each function addr, the second column contains 2 byte indices in order.
40      The indices are offsets (in count of 2 bytes) of the CFI data from start of
41      UNW_DATA.
42 The last entry in the table always contains CANT_UNWIND index to specify the
43 end address of the last function.
44 
45 UNW_DATA contains data of all the functions. Each function data contains N rows.
46 The data found at the address pointed from UNW_INDEX will be:
47   2 bytes: N - number of rows that belong to current function.
48   N * 4 bytes: N rows of data. 16 bits : Address offset from function start.
49                                14 bits : CFA offset / 4.
50                                 2 bits : RA offset / 4.
51 If the RA offset of a row is 0, then use the offset of the previous rows in the
52 same function.
53 TODO(ssid): Make sure RA offset is always present.
54 
55 See extract_unwind_tables.py for details about how this data is extracted from
56 breakpad symbol files.
57 */
58 
59 extern "C" {
60 extern char __executable_start;
61 extern char _etext;
62 }
63 
64 namespace base {
65 namespace trace_event {
66 
67 namespace {
68 
69 // The value of index when the function does not have unwind information.
70 constexpr uint32_t kCantUnwind = 0xFFFF;
71 
72 // The mask on the CFI row data that is used to get the high 14 bits and
73 // multiply it by 4 to get CFA offset. Since the last 2 bits are masked out, a
74 // shift is not necessary.
75 constexpr uint16_t kCFAMask = 0xfffc;
76 
77 // The mask on the CFI row data that is used to get the low 2 bits and multiply
78 // it by 4 to get the RA offset.
79 constexpr uint16_t kRAMask = 0x3;
80 constexpr uint16_t kRAShift = 2;
81 
82 // The code in this file assumes we are running in 32-bit builds since all the
83 // addresses in the unwind table are specified in 32 bits.
84 static_assert(sizeof(uintptr_t) == 4,
85               "The unwind table format is only valid for 32 bit builds.");
86 
87 // The CFI data in UNW_DATA table starts with number of rows (N) and then
88 // followed by N rows of 4 bytes long. The CFIUnwindDataRow represents a single
89 // row of CFI data of a function in the table. Since we cast the memory at the
90 // address after the address of number of rows, into an array of
91 // CFIUnwindDataRow, the size of the struct should be 4 bytes and the order of
92 // the members is fixed according to the given format. The first 2 bytes tell
93 // the address of function and last 2 bytes give the CFI data for the offset.
94 struct CFIUnwindDataRow {
95   // The address of the instruction in terms of offset from the start of the
96   // function.
97   uint16_t addr_offset;
98   // Represents the CFA and RA offsets to get information about next stack
99   // frame. This is the CFI data at the point before executing the instruction
100   // at |addr_offset| from the start of the function.
101   uint16_t cfi_data;
102 
103   // Return the RA offset for the current unwind row.
ra_offsetbase::trace_event::__anon44ba69c30111::CFIUnwindDataRow104   size_t ra_offset() const { return (cfi_data & kRAMask) << kRAShift; }
105 
106   // Returns the CFA offset for the current unwind row.
cfa_offsetbase::trace_event::__anon44ba69c30111::CFIUnwindDataRow107   size_t cfa_offset() const { return cfi_data & kCFAMask; }
108 };
109 
110 static_assert(
111     sizeof(CFIUnwindDataRow) == 4,
112     "The CFIUnwindDataRow struct must be exactly 4 bytes for searching.");
113 
114 }  // namespace
115 
116 // static
GetInitializedInstance()117 CFIBacktraceAndroid* CFIBacktraceAndroid::GetInitializedInstance() {
118   static CFIBacktraceAndroid* instance = new CFIBacktraceAndroid();
119   return instance;
120 }
121 
CFIBacktraceAndroid()122 CFIBacktraceAndroid::CFIBacktraceAndroid()
123     : thread_local_cfi_cache_(
124           [](void* ptr) { delete static_cast<CFICache*>(ptr); }) {
125   Initialize();
126 }
127 
~CFIBacktraceAndroid()128 CFIBacktraceAndroid::~CFIBacktraceAndroid() {}
129 
Initialize()130 void CFIBacktraceAndroid::Initialize() {
131   // The address |_etext| gives the end of the .text section in the binary. This
132   // value is more accurate than parsing the memory map since the mapped
133   // regions are usualy larger than the .text section.
134   executable_end_addr_ = reinterpret_cast<uintptr_t>(&_etext);
135   // The address of |__executable_start| gives the start address of the
136   // executable. This value is used to find the offset address of the
137   // instruction in binary from PC.
138   executable_start_addr_ = reinterpret_cast<uintptr_t>(&__executable_start);
139 
140   // This file name is defined by extract_unwind_tables.gni.
141   static constexpr char kCfiFileName[] = "assets/unwind_cfi_32";
142   MemoryMappedFile::Region cfi_region;
143   int fd = base::android::OpenApkAsset(kCfiFileName, &cfi_region);
144   if (fd < 0)
145     return;
146   cfi_mmap_ = std::make_unique<MemoryMappedFile>();
147   // The CFI region starts at |cfi_region.offset|.
148   if (!cfi_mmap_->Initialize(base::File(fd), cfi_region))
149     return;
150 
151   ParseCFITables();
152   can_unwind_stack_frames_ = true;
153 }
154 
ParseCFITables()155 void CFIBacktraceAndroid::ParseCFITables() {
156   // The first 4 bytes in the file is the size of UNW_INDEX table.
157   static constexpr size_t kUnwIndexRowSize =
158       sizeof(*unw_index_function_col_) + sizeof(*unw_index_indices_col_);
159   size_t unw_index_size = 0;
160   memcpy(&unw_index_size, cfi_mmap_->data(), sizeof(unw_index_size));
161   DCHECK_EQ(0u, unw_index_size % kUnwIndexRowSize);
162   // UNW_INDEX table starts after 4 bytes.
163   unw_index_function_col_ =
164       reinterpret_cast<const uintptr_t*>(cfi_mmap_->data()) + 1;
165   unw_index_row_count_ = unw_index_size / kUnwIndexRowSize;
166   unw_index_indices_col_ = reinterpret_cast<const uint16_t*>(
167       unw_index_function_col_ + unw_index_row_count_);
168 
169   // The UNW_DATA table data is right after the end of UNW_INDEX table.
170   // Interpret the UNW_DATA table as an array of 2 byte numbers since the
171   // indexes we have from the UNW_INDEX table are in terms of 2 bytes.
172   unw_data_start_addr_ = unw_index_indices_col_ + unw_index_row_count_;
173 }
174 
Unwind(const void ** out_trace,size_t max_depth)175 size_t CFIBacktraceAndroid::Unwind(const void** out_trace, size_t max_depth) {
176   // This function walks the stack using the call frame information to find the
177   // return addresses of all the functions that belong to current binary in call
178   // stack. For each function the CFI table defines the offset of the previous
179   // call frame and offset where the return address is stored.
180   if (!can_unwind_stack_frames())
181     return 0;
182 
183   // Get the current register state. This register state can be taken at any
184   // point in the function and the unwind information would be for this point.
185   // Define local variables before trying to get the current PC and SP to make
186   // sure the register state obtained is consistent with each other.
187   uintptr_t pc = 0, sp = 0;
188   asm volatile("mov %0, pc" : "=r"(pc));
189   asm volatile("mov %0, sp" : "=r"(sp));
190 
191   // We can only unwind as long as the pc is within the chrome.so.
192   size_t depth = 0;
193   while (pc > executable_start_addr_ && pc <= executable_end_addr_ &&
194          depth < max_depth) {
195     out_trace[depth++] = reinterpret_cast<void*>(pc);
196     // The offset of function from the start of the chrome.so binary:
197     uintptr_t func_addr = pc - executable_start_addr_;
198     CFIRow cfi{};
199     if (!FindCFIRowForPC(func_addr, &cfi))
200       break;
201 
202     // The rules for unwinding using the CFI information are:
203     // SP_prev = SP_cur + cfa_offset and
204     // PC_prev = * (SP_prev - ra_offset).
205     sp = sp + cfi.cfa_offset;
206     memcpy(&pc, reinterpret_cast<uintptr_t*>(sp - cfi.ra_offset),
207            sizeof(uintptr_t));
208   }
209   return depth;
210 }
211 
FindCFIRowForPC(uintptr_t func_addr,CFIBacktraceAndroid::CFIRow * cfi)212 bool CFIBacktraceAndroid::FindCFIRowForPC(uintptr_t func_addr,
213                                           CFIBacktraceAndroid::CFIRow* cfi) {
214   auto* cache = GetThreadLocalCFICache();
215   *cfi = {0};
216   if (cache->Find(func_addr, cfi))
217     return true;
218 
219   // Consider each column of UNW_INDEX table as arrays of uintptr_t (function
220   // addresses) and uint16_t (indices). Define start and end iterator on the
221   // first column array (addresses) and use std::lower_bound() to binary search
222   // on this array to find the required function address.
223   static const uintptr_t* const unw_index_fn_end =
224       unw_index_function_col_ + unw_index_row_count_;
225   const uintptr_t* found =
226       std::lower_bound(unw_index_function_col_, unw_index_fn_end, func_addr);
227 
228   // If found is start, then the given function is not in the table. If the
229   // given pc is start of a function then we cannot unwind.
230   if (found == unw_index_function_col_ || *found == func_addr)
231     return false;
232 
233   // std::lower_bound() returns the iter that corresponds to the first address
234   // that is greater than the given address. So, the required iter is always one
235   // less than the value returned by std::lower_bound().
236   --found;
237   uintptr_t func_start_addr = *found;
238   size_t row_num = found - unw_index_function_col_;
239   uint16_t index = unw_index_indices_col_[row_num];
240   DCHECK_LE(func_start_addr, func_addr);
241   // If the index is CANT_UNWIND then we do not have unwind infomation for the
242   // function.
243   if (index == kCantUnwind)
244     return false;
245 
246   // The unwind data for the current function is at an offsset of the index
247   // found in UNW_INDEX table.
248   const uint16_t* unwind_data = unw_data_start_addr_ + index;
249   // The value of first 2 bytes is the CFI data row count for the function.
250   uint16_t row_count = 0;
251   memcpy(&row_count, unwind_data, sizeof(row_count));
252   // And the actual CFI rows start after 2 bytes from the |unwind_data|. Cast
253   // the data into an array of CFIUnwindDataRow since the struct is designed to
254   // represent each row. We should be careful to read only |row_count| number of
255   // elements in the array.
256   const CFIUnwindDataRow* function_data =
257       reinterpret_cast<const CFIUnwindDataRow*>(unwind_data + 1);
258 
259   // Iterate through the CFI rows of the function to find the row that gives
260   // offset for the given instruction address.
261   CFIUnwindDataRow cfi_row = {0, 0};
262   uint16_t ra_offset = 0;
263   for (uint16_t i = 0; i < row_count; ++i) {
264     CFIUnwindDataRow row;
265     memcpy(&row, function_data + i, sizeof(CFIUnwindDataRow));
266     // The return address of the function is the instruction that is not yet
267     // been executed. The cfi row specifies the unwind info before executing the
268     // given instruction. If the given address is equal to the instruction
269     // offset, then use the current row. Or use the row with highest address
270     // less than the given address.
271     if (row.addr_offset + func_start_addr > func_addr)
272       break;
273 
274     cfi_row = row;
275     // The ra offset of the last specified row should be used, if unspecified.
276     // So, keep updating the RA offset till we reach the correct CFI row.
277     // TODO(ssid): This should be fixed in the format and we should always
278     // output ra offset.
279     if (cfi_row.ra_offset())
280       ra_offset = cfi_row.ra_offset();
281   }
282   DCHECK_NE(0u, cfi_row.addr_offset);
283   *cfi = {cfi_row.cfa_offset(), ra_offset};
284   DCHECK(cfi->cfa_offset);
285   DCHECK(cfi->ra_offset);
286 
287   // safe to update since the cache is thread local.
288   cache->Add(func_addr, *cfi);
289   return true;
290 }
291 
GetThreadLocalCFICache()292 CFIBacktraceAndroid::CFICache* CFIBacktraceAndroid::GetThreadLocalCFICache() {
293   auto* cache = static_cast<CFICache*>(thread_local_cfi_cache_.Get());
294   if (!cache) {
295     cache = new CFICache();
296     thread_local_cfi_cache_.Set(cache);
297   }
298   return cache;
299 }
300 
Add(uintptr_t address,CFIRow cfi)301 void CFIBacktraceAndroid::CFICache::Add(uintptr_t address, CFIRow cfi) {
302   cache_[address % kLimit] = {address, cfi};
303 }
304 
Find(uintptr_t address,CFIRow * cfi)305 bool CFIBacktraceAndroid::CFICache::Find(uintptr_t address, CFIRow* cfi) {
306   if (cache_[address % kLimit].address == address) {
307     *cfi = cache_[address % kLimit].cfi;
308     return true;
309   }
310   return false;
311 }
312 
313 }  // namespace trace_event
314 }  // namespace base
315