1 // Copyright (c) 2017 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/android/orderfile/orderfile_instrumentation.h"
6 
7 #include <time.h>
8 #include <unistd.h>
9 
10 #include <atomic>
11 #include <cstdio>
12 #include <cstring>
13 #include <string>
14 #include <thread>
15 #include <vector>
16 
17 #include "base/android/library_loader/anchor_functions.h"
18 #include "base/android/orderfile/orderfile_buildflags.h"
19 #include "base/files/file.h"
20 #include "base/format_macros.h"
21 #include "base/logging.h"
22 #include "base/macros.h"
23 #include "base/strings/stringprintf.h"
24 #include "build/build_config.h"
25 
26 #if BUILDFLAG(DEVTOOLS_INSTRUMENTATION_DUMPING)
27 #include <sstream>
28 
29 #include "base/command_line.h"
30 #include "base/time/time.h"
31 #include "base/trace_event/memory_dump_manager.h"
32 #include "base/trace_event/memory_dump_provider.h"
33 #endif  // BUILDFLAG(DEVTOOLS_INSTRUMENTATION_DUMPING)
34 
35 #if !defined(ARCH_CPU_ARMEL)
36 #error Only supported on ARM.
37 #endif  // !defined(ARCH_CPU_ARMEL)
38 
39 // Must be applied to all functions within this file.
40 #define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
41 
42 namespace base {
43 namespace android {
44 namespace orderfile {
45 
46 namespace {
47 // Constants used for StartDelayedDump().
48 constexpr int kDelayInSeconds = 30;
49 constexpr int kInitialDelayInSeconds = kPhases == 1 ? kDelayInSeconds : 5;
50 
51 #if BUILDFLAG(DEVTOOLS_INSTRUMENTATION_DUMPING)
52 // This is defined in content/public/common/content_switches.h, which is not
53 // accessible in ::base.
54 constexpr const char kProcessTypeSwitch[] = "type";
55 #endif  // BUILDFLAG(DEVTOOLS_INSTRUMENTATION_DUMPING)
56 
57 // These are large overestimates, which is not an issue, as the data is
58 // allocated in .bss, and on linux doesn't take any actual memory when it's not
59 // touched.
60 constexpr size_t kBitfieldSize = 1 << 22;
61 constexpr size_t kMaxTextSizeInBytes = kBitfieldSize * (4 * 32);
62 constexpr size_t kMaxElements = 1 << 20;
63 
64 // Data required to log reached offsets.
65 struct LogData {
66   std::atomic<uint32_t> offsets[kBitfieldSize];
67   std::atomic<size_t> ordered_offsets[kMaxElements];
68   std::atomic<size_t> index;
69 };
70 
71 LogData g_data[kPhases];
72 std::atomic<int> g_data_index;
73 
74 #if BUILDFLAG(DEVTOOLS_INSTRUMENTATION_DUMPING)
75 // Dump offsets when a memory dump is requested. Used only if
76 // switches::kDevtoolsInstrumentationDumping is set.
77 class OrderfileMemoryDumpHook : public base::trace_event::MemoryDumpProvider {
OnMemoryDump(const base::trace_event::MemoryDumpArgs & args,base::trace_event::ProcessMemoryDump * pmd)78   NO_INSTRUMENT_FUNCTION bool OnMemoryDump(
79       const base::trace_event::MemoryDumpArgs& args,
80       base::trace_event::ProcessMemoryDump* pmd) override {
81     // Disable instrumentation now to cut down on orderfile pollution.
82     if (!Disable()) {
83       return true;  // A dump has already been started.
84     }
85     std::stringstream process_type_str;
86     Dump(base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
87         kProcessTypeSwitch));
88     return true;  // If something goes awry, a fatal error will be created
89                   // internally.
90   }
91 };
92 #endif  // BUILDFLAG(DEVTOOLS_INSTRUMENTATION_DUMPING)
93 
94 // |RecordAddress()| adds an element to a concurrent bitset and to a concurrent
95 // append-only list of offsets.
96 //
97 // Ordering:
98 // Two consecutive calls to |RecordAddress()| from the same thread will be
99 // ordered in the same way in the result, as written by
100 // |StopAndDumpToFile()|. The result will contain exactly one instance of each
101 // unique offset relative to |kStartOfText| passed to |RecordAddress()|.
102 //
103 // Implementation:
104 // The "set" part is implemented with a bitfield, |g_offset|. The insertion
105 // order is recorded in |g_ordered_offsets|.
106 // This is not a class to make sure there isn't a static constructor, as it
107 // would cause issue with an instrumented static constructor calling this code.
108 //
109 // Limitations:
110 // - Only records offsets to addresses between |kStartOfText| and |kEndOfText|.
111 // - Capacity of the set is limited by |kMaxElements|.
112 // - Some insertions at the end of collection may be lost.
113 
114 // Records that |address| has been reached, if recording is enabled.
115 // To avoid infinite recursion, this *must* *never* call any instrumented
116 // function, unless |Disable()| is called first.
117 template <bool for_testing>
RecordAddress(size_t address)118 __attribute__((always_inline, no_instrument_function)) void RecordAddress(
119     size_t address) {
120   int index = g_data_index.load(std::memory_order_relaxed);
121   if (index >= kPhases)
122     return;
123 
124   const size_t start =
125       for_testing ? kStartOfTextForTesting : base::android::kStartOfText;
126   const size_t end =
127       for_testing ? kEndOfTextForTesting : base::android::kEndOfText;
128   if (UNLIKELY(address < start || address > end)) {
129     Disable();
130     // If the start and end addresses are set incorrectly, this code path is
131     // likely happening during a static initializer. Logging at this time is
132     // prone to deadlock. By crashing immediately we at least have a chance to
133     // get a stack trace from the system to give some clue about the nature of
134     // the problem.
135     IMMEDIATE_CRASH();
136   }
137 
138   size_t offset = address - start;
139   static_assert(sizeof(int) == 4,
140                 "Collection and processing code assumes that sizeof(int) == 4");
141   size_t offset_index = offset / 4;
142 
143   auto* offsets = g_data[index].offsets;
144   // Atomically set the corresponding bit in the array.
145   std::atomic<uint32_t>* element = offsets + (offset_index / 32);
146   // First, a racy check. This saves a CAS if the bit is already set, and
147   // allows the cache line to remain shared acoss CPUs in this case.
148   uint32_t value = element->load(std::memory_order_relaxed);
149   uint32_t mask = 1 << (offset_index % 32);
150   if (value & mask)
151     return;
152 
153   auto before = element->fetch_or(mask, std::memory_order_relaxed);
154   if (before & mask)
155     return;
156 
157   // We were the first one to set the element, record it in the ordered
158   // elements list.
159   // Use relaxed ordering, as the value is not published, or used for
160   // synchronization.
161   auto* ordered_offsets = g_data[index].ordered_offsets;
162   auto& ordered_offsets_index = g_data[index].index;
163   size_t insertion_index =
164       ordered_offsets_index.fetch_add(1, std::memory_order_relaxed);
165   if (UNLIKELY(insertion_index >= kMaxElements)) {
166     Disable();
167     LOG(FATAL) << "Too many reached offsets";
168   }
169   ordered_offsets[insertion_index].store(offset, std::memory_order_relaxed);
170 }
171 
DumpToFile(const base::FilePath & path,const LogData & data)172 NO_INSTRUMENT_FUNCTION bool DumpToFile(const base::FilePath& path,
173                                        const LogData& data) {
174   auto file =
175       base::File(path, base::File::FLAG_CREATE_ALWAYS | base::File::FLAG_WRITE);
176   if (!file.IsValid()) {
177     PLOG(ERROR) << "Could not open " << path;
178     return false;
179   }
180 
181   if (data.index == 0) {
182     LOG(ERROR) << "No entries to dump";
183     return false;
184   }
185 
186   size_t count = data.index - 1;
187   for (size_t i = 0; i < count; i++) {
188     // |g_ordered_offsets| is initialized to 0, so a 0 in the middle of it
189     // indicates a case where the index was incremented, but the write is not
190     // visible in this thread yet. Safe to skip, also because the function at
191     // the start of text is never called.
192     auto offset = data.ordered_offsets[i].load(std::memory_order_relaxed);
193     if (!offset)
194       continue;
195     auto offset_str = base::StringPrintf("%" PRIuS "\n", offset);
196     if (file.WriteAtCurrentPos(offset_str.c_str(),
197                                static_cast<int>(offset_str.size())) < 0) {
198       // If the file could be opened, but writing has failed, it's likely that
199       // data was partially written. Producing incomplete profiling data would
200       // lead to a poorly performing orderfile, but might not be otherwised
201       // noticed. So we crash instead.
202       LOG(FATAL) << "Error writing profile data";
203     }
204   }
205   return true;
206 }
207 
208 // Stops recording, and outputs the data to |path|.
StopAndDumpToFile(int pid,uint64_t start_ns_since_epoch,const std::string & tag)209 NO_INSTRUMENT_FUNCTION void StopAndDumpToFile(int pid,
210                                               uint64_t start_ns_since_epoch,
211                                               const std::string& tag) {
212   Disable();
213 
214   for (int phase = 0; phase < kPhases; phase++) {
215     std::string tag_str;
216     if (!tag.empty())
217       tag_str = base::StringPrintf("%s-", tag.c_str());
218     auto path = base::StringPrintf(
219         "/data/local/tmp/chrome/orderfile/profile-hitmap-%s%d-%" PRIu64
220         ".txt_%d",
221         tag_str.c_str(), pid, start_ns_since_epoch, phase);
222     if (!DumpToFile(base::FilePath(path), g_data[phase])) {
223       LOG(ERROR) << "Problem with dump " << phase << " (" << tag << ")";
224     }
225   }
226 }
227 
228 }  // namespace
229 
Disable()230 NO_INSTRUMENT_FUNCTION bool Disable() {
231   auto old_phase = g_data_index.exchange(kPhases, std::memory_order_relaxed);
232   std::atomic_thread_fence(std::memory_order_seq_cst);
233   return old_phase != kPhases;
234 }
235 
SanityChecks()236 NO_INSTRUMENT_FUNCTION void SanityChecks() {
237   CHECK_LT(base::android::kEndOfText - base::android::kStartOfText,
238            kMaxTextSizeInBytes);
239   CHECK(base::android::IsOrderingSane());
240 }
241 
SwitchToNextPhaseOrDump(int pid,uint64_t start_ns_since_epoch)242 NO_INSTRUMENT_FUNCTION bool SwitchToNextPhaseOrDump(
243     int pid,
244     uint64_t start_ns_since_epoch) {
245   int before = g_data_index.fetch_add(1, std::memory_order_relaxed);
246   if (before + 1 == kPhases) {
247     StopAndDumpToFile(pid, start_ns_since_epoch, "");
248     return true;
249   }
250   return false;
251 }
252 
StartDelayedDump()253 NO_INSTRUMENT_FUNCTION void StartDelayedDump() {
254   // Using std::thread and not using base::TimeTicks() in order to to not call
255   // too many base:: symbols that would pollute the reached symbol dumps.
256   struct timespec ts;
257   if (clock_gettime(CLOCK_MONOTONIC, &ts))
258     PLOG(FATAL) << "clock_gettime.";
259   uint64_t start_ns_since_epoch =
260       static_cast<uint64_t>(ts.tv_sec) * 1000 * 1000 * 1000 + ts.tv_nsec;
261   int pid = getpid();
262 
263 #if BUILDFLAG(DEVTOOLS_INSTRUMENTATION_DUMPING)
264   static auto* g_orderfile_memory_dump_hook = new OrderfileMemoryDumpHook();
265   base::trace_event::MemoryDumpManager::GetInstance()->RegisterDumpProvider(
266       g_orderfile_memory_dump_hook, "Orderfile", nullptr);
267 #endif  // BUILDFLAG(DEVTOOLS_INSTRUMENTATION_DUMPING)
268 
269   std::thread([pid, start_ns_since_epoch]() {
270     sleep(kInitialDelayInSeconds);
271 #if BUILDFLAG(DEVTOOLS_INSTRUMENTATION_DUMPING)
272     SwitchToNextPhaseOrDump(pid, start_ns_since_epoch);
273 // Return, letting devtools tracing handle any post-startup phases.
274 #else
275     while (!SwitchToNextPhaseOrDump(pid, start_ns_since_epoch))
276       sleep(kDelayInSeconds);
277 #endif  // BUILDFLAG(DEVTOOLS_INSTRUMENTATION_DUMPING)
278   })
279       .detach();
280 }
281 
Dump(const std::string & tag)282 NO_INSTRUMENT_FUNCTION void Dump(const std::string& tag) {
283   // As profiling has been disabled, none of the uses of ::base symbols below
284   // will enter the symbol dump.
285   StopAndDumpToFile(
286       getpid(), (base::Time::Now() - base::Time::UnixEpoch()).InNanoseconds(),
287       tag);
288 }
289 
ResetForTesting()290 NO_INSTRUMENT_FUNCTION void ResetForTesting() {
291   Disable();
292   g_data_index = 0;
293   for (int i = 0; i < kPhases; i++) {
294     memset(reinterpret_cast<uint32_t*>(g_data[i].offsets), 0,
295            sizeof(uint32_t) * kBitfieldSize);
296     memset(reinterpret_cast<uint32_t*>(g_data[i].ordered_offsets), 0,
297            sizeof(uint32_t) * kMaxElements);
298     g_data[i].index.store(0);
299   }
300 }
301 
RecordAddressForTesting(size_t address)302 NO_INSTRUMENT_FUNCTION void RecordAddressForTesting(size_t address) {
303   return RecordAddress<true>(address);
304 }
305 
GetOrderedOffsetsForTesting()306 NO_INSTRUMENT_FUNCTION std::vector<size_t> GetOrderedOffsetsForTesting() {
307   std::vector<size_t> result;
308   size_t max_index = g_data[0].index.load(std::memory_order_relaxed);
309   for (size_t i = 0; i < max_index; ++i) {
310     auto value = g_data[0].ordered_offsets[i].load(std::memory_order_relaxed);
311     if (value)
312       result.push_back(value);
313   }
314   return result;
315 }
316 
317 }  // namespace orderfile
318 }  // namespace android
319 }  // namespace base
320 
321 extern "C" {
322 
__cyg_profile_func_enter_bare()323 NO_INSTRUMENT_FUNCTION void __cyg_profile_func_enter_bare() {
324   base::android::orderfile::RecordAddress<false>(
325       reinterpret_cast<size_t>(__builtin_return_address(0)));
326 }
327 
328 }  // extern "C"
329