1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <inttypes.h>
18 #include <libgen.h>
19 #include <signal.h>
20 #include <sys/mman.h>
21 #include <sys/prctl.h>
22 #include <sys/utsname.h>
23 #include <time.h>
24 #include <unistd.h>
25 #include <set>
26 #include <string>
27 #include <unordered_map>
28 #include <unordered_set>
29 #include <vector>
30 
31 #include <android-base/logging.h>
32 #include <android-base/file.h>
33 #include <android-base/parseint.h>
34 #include <android-base/strings.h>
35 #include <android-base/unique_fd.h>
36 #if defined(__ANDROID__)
37 #include <android-base/properties.h>
38 #endif
39 
40 #include "CallChainJoiner.h"
41 #include "command.h"
42 #include "environment.h"
43 #include "ETMRecorder.h"
44 #include "event_selection_set.h"
45 #include "event_type.h"
46 #include "IOEventLoop.h"
47 #include "JITDebugReader.h"
48 #include "OfflineUnwinder.h"
49 #include "read_apk.h"
50 #include "read_elf.h"
51 #include "record.h"
52 #include "record_file.h"
53 #include "thread_tree.h"
54 #include "tracing.h"
55 #include "utils.h"
56 #include "workload.h"
57 
58 using namespace simpleperf;
59 
60 static std::string default_measured_event_type = "cpu-cycles";
61 
62 static std::unordered_map<std::string, uint64_t> branch_sampling_type_map = {
63     {"u", PERF_SAMPLE_BRANCH_USER},
64     {"k", PERF_SAMPLE_BRANCH_KERNEL},
65     {"any", PERF_SAMPLE_BRANCH_ANY},
66     {"any_call", PERF_SAMPLE_BRANCH_ANY_CALL},
67     {"any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN},
68     {"ind_call", PERF_SAMPLE_BRANCH_IND_CALL},
69 };
70 
71 static std::unordered_map<std::string, int> clockid_map = {
72     {"realtime", CLOCK_REALTIME},
73     {"monotonic", CLOCK_MONOTONIC},
74     {"monotonic_raw", CLOCK_MONOTONIC_RAW},
75     {"boottime", CLOCK_BOOTTIME},
76 };
77 
78 // The max size of records dumped by kernel is 65535, and dump stack size
79 // should be a multiply of 8, so MAX_DUMP_STACK_SIZE is 65528.
80 constexpr uint32_t MAX_DUMP_STACK_SIZE = 65528;
81 
82 // The max allowed pages in mapped buffer is decided by rlimit(RLIMIT_MEMLOCK).
83 // Here 1024 is a desired value for pages in mapped buffer. If mapped
84 // successfully, the buffer size = 1024 * 4K (page size) = 4M.
85 constexpr size_t DESIRED_PAGES_IN_MAPPED_BUFFER = 1024;
86 
87 // Cache size used by CallChainJoiner to cache call chains in memory.
88 constexpr size_t DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE = 8 * 1024 * 1024;
89 
90 // Currently, the record buffer size in user-space is set to match the kernel buffer size on a
91 // 8 core system. For system-wide recording, it is 8K pages * 4K page_size * 8 cores = 256MB.
92 // For non system-wide recording, it is 1K pages * 4K page_size * 8 cores = 64MB.
93 static constexpr size_t kRecordBufferSize = 64 * 1024 * 1024;
94 static constexpr size_t kSystemWideRecordBufferSize = 256 * 1024 * 1024;
95 
96 static constexpr size_t kDefaultAuxBufferSize = 4 * 1024 * 1024;
97 
98 // On Pixel 3, it takes about 1ms to enable ETM, and 16-40ms to disable ETM and copy 4M ETM data.
99 // So make default period to 100ms.
100 static constexpr double kDefaultEtmDataFlushPeriodInSec = 0.1;
101 
102 struct TimeStat {
103   uint64_t prepare_recording_time = 0;
104   uint64_t start_recording_time = 0;
105   uint64_t stop_recording_time = 0;
106   uint64_t finish_recording_time = 0;
107   uint64_t post_process_time = 0;
108 };
109 
110 class RecordCommand : public Command {
111  public:
112   RecordCommand()
113       : Command(
114             "record", "record sampling info in perf.data",
115             // clang-format off
116 "Usage: simpleperf record [options] [--] [command [command-args]]\n"
117 "       Gather sampling information of running [command]. And -a/-p/-t option\n"
118 "       can be used to change target of sampling information.\n"
119 "       The default options are: -e cpu-cycles -f 4000 -o perf.data.\n"
120 "Select monitored threads:\n"
121 "-a     System-wide collection. Use with --exclude-perf to exclude samples for\n"
122 "       simpleperf process.\n"
123 #if defined(__ANDROID__)
124 "--app package_name    Profile the process of an Android application.\n"
125 "                      On non-rooted devices, the app must be debuggable,\n"
126 "                      because we use run-as to switch to the app's context.\n"
127 #endif
128 "-p pid1,pid2,...       Record events on existing processes. Mutually exclusive\n"
129 "                       with -a.\n"
130 "-t tid1,tid2,... Record events on existing threads. Mutually exclusive with -a.\n"
131 "--exclude-perf   Exclude samples for simpleperf process.\n"
132 "\n"
133 "Select monitored event types:\n"
134 "-e event1[:modifier1],event2[:modifier2],...\n"
135 "             Select a list of events to record. An event can be:\n"
136 "               1) an event name listed in `simpleperf list`;\n"
137 "               2) a raw PMU event in rN format. N is a hex number.\n"
138 "                  For example, r1b selects event number 0x1b.\n"
139 "             Modifiers can be added to define how the event should be\n"
140 "             monitored. Possible modifiers are:\n"
141 "                u - monitor user space events only\n"
142 "                k - monitor kernel space events only\n"
143 "--group event1[:modifier],event2[:modifier2],...\n"
144 "             Similar to -e option. But events specified in the same --group\n"
145 "             option are monitored as a group, and scheduled in and out at the\n"
146 "             same time.\n"
147 "--trace-offcpu   Generate samples when threads are scheduled off cpu.\n"
148 "                 Similar to \"-c 1 -e sched:sched_switch\".\n"
149 "\n"
150 "Select monitoring options:\n"
151 "-f freq      Set event sample frequency. It means recording at most [freq]\n"
152 "             samples every second. For non-tracepoint events, the default\n"
153 "             option is -f 4000. A -f/-c option affects all event types\n"
154 "             following it until meeting another -f/-c option. For example,\n"
155 "             for \"-f 1000 cpu-cycles -c 1 -e sched:sched_switch\", cpu-cycles\n"
156 "             has sample freq 1000, sched:sched_switch event has sample period 1.\n"
157 "-c count     Set event sample period. It means recording one sample when\n"
158 "             [count] events happen. For tracepoint events, the default option\n"
159 "             is -c 1.\n"
160 "--call-graph fp | dwarf[,<dump_stack_size>]\n"
161 "             Enable call graph recording. Use frame pointer or dwarf debug\n"
162 "             frame as the method to parse call graph in stack.\n"
163 "             Default is dwarf,65528.\n"
164 "-g           Same as '--call-graph dwarf'.\n"
165 "--clockid clock_id      Generate timestamps of samples using selected clock.\n"
166 "                        Possible values are: realtime, monotonic,\n"
167 "                        monotonic_raw, boottime, perf. If supported, default\n"
168 "                        is monotonic, otherwise is perf.\n"
169 "--cpu cpu_item1,cpu_item2,...\n"
170 "             Collect samples only on the selected cpus. cpu_item can be cpu\n"
171 "             number like 1, or cpu range like 0-3.\n"
172 "--duration time_in_sec  Monitor for time_in_sec seconds instead of running\n"
173 "                        [command]. Here time_in_sec may be any positive\n"
174 "                        floating point number.\n"
175 "-j branch_filter1,branch_filter2,...\n"
176 "             Enable taken branch stack sampling. Each sample captures a series\n"
177 "             of consecutive taken branches.\n"
178 "             The following filters are defined:\n"
179 "                any: any type of branch\n"
180 "                any_call: any function call or system call\n"
181 "                any_ret: any function return or system call return\n"
182 "                ind_call: any indirect branch\n"
183 "                u: only when the branch target is at the user level\n"
184 "                k: only when the branch target is in the kernel\n"
185 "             This option requires at least one branch type among any, any_call,\n"
186 "             any_ret, ind_call.\n"
187 "-b           Enable taken branch stack sampling. Same as '-j any'.\n"
188 "-m mmap_pages   Set the size of the buffer used to receiving sample data from\n"
189 "                the kernel. It should be a power of 2. If not set, the max\n"
190 "                possible value <= 1024 will be used.\n"
191 "--aux-buffer-size <buffer_size>  Set aux buffer size, only used in cs-etm event type.\n"
192 "                                 Need to be power of 2 and page size aligned.\n"
193 "                                 Used memory size is (buffer_size * (cpu_count + 1).\n"
194 "                                 Default is 4M.\n"
195 "--no-inherit  Don't record created child threads/processes.\n"
196 "--cpu-percent <percent>  Set the max percent of cpu time used for recording.\n"
197 "                         percent is in range [1-100], default is 25.\n"
198 "--include-filter binary1,binary2,...\n"
199 "                Trace only selected binaries in cs-etm instruction tracing.\n"
200 "                Each entry is a binary path.\n"
201 "\n"
202 "Dwarf unwinding options:\n"
203 "--post-unwind=(yes|no) If `--call-graph dwarf` option is used, then the user's\n"
204 "                       stack will be recorded in perf.data and unwound while\n"
205 "                       recording by default. Use --post-unwind=yes to switch\n"
206 "                       to unwind after recording.\n"
207 "--no-unwind   If `--call-graph dwarf` option is used, then the user's stack\n"
208 "              will be unwound by default. Use this option to disable the\n"
209 "              unwinding of the user's stack.\n"
210 "--no-callchain-joiner  If `--call-graph dwarf` option is used, then by default\n"
211 "                       callchain joiner is used to break the 64k stack limit\n"
212 "                       and build more complete call graphs. However, the built\n"
213 "                       call graphs may not be correct in all cases.\n"
214 "--callchain-joiner-min-matching-nodes count\n"
215 "               When callchain joiner is used, set the matched nodes needed to join\n"
216 "               callchains. The count should be >= 1. By default it is 1.\n"
217 "--no-cut-samples   Simpleperf uses a record buffer to cache records received from the kernel.\n"
218 "                   When the available space in the buffer reaches low level, it cuts part of\n"
219 "                   the stack data in samples. When the available space reaches critical level,\n"
220 "                   it drops all samples. This option makes simpleperf not cut samples when the\n"
221 "                   available space reaches low level.\n"
222 "\n"
223 "Recording file options:\n"
224 "--no-dump-kernel-symbols  Don't dump kernel symbols in perf.data. By default\n"
225 "                          kernel symbols will be dumped when needed.\n"
226 "--no-dump-symbols       Don't dump symbols in perf.data. By default symbols are\n"
227 "                        dumped in perf.data, to support reporting in another\n"
228 "                        environment.\n"
229 "-o record_file_name    Set record file name, default is perf.data.\n"
230 "--size-limit SIZE[K|M|G]      Stop recording after SIZE bytes of records.\n"
231 "                              Default is unlimited.\n"
232 "--symfs <dir>    Look for files with symbols relative to this directory.\n"
233 "                 This option is used to provide files with symbol table and\n"
234 "                 debug information, which are used for unwinding and dumping symbols.\n"
235 "\n"
236 "Other options:\n"
237 "--exit-with-parent            Stop recording when the process starting\n"
238 "                              simpleperf dies.\n"
239 "--start_profiling_fd fd_no    After starting profiling, write \"STARTED\" to\n"
240 "                              <fd_no>, then close <fd_no>.\n"
241 "--stdio-controls-profiling    Use stdin/stdout to pause/resume profiling.\n"
242 #if defined(__ANDROID__)
243 "--in-app                      We are already running in the app's context.\n"
244 "--tracepoint-events file_name   Read tracepoint events from [file_name] instead of tracefs.\n"
245 #endif
246 #if 0
247 // Below options are only used internally and shouldn't be visible to the public.
248 "--out-fd <fd>    Write perf.data to a file descriptor.\n"
249 "--stop-signal-fd <fd>  Stop recording when fd is readable.\n"
250 #endif
251             // clang-format on
252             ),
253         system_wide_collection_(false),
254         branch_sampling_(0),
255         fp_callchain_sampling_(false),
256         dwarf_callchain_sampling_(false),
257         dump_stack_size_in_dwarf_sampling_(MAX_DUMP_STACK_SIZE),
258         unwind_dwarf_callchain_(true),
259         post_unwind_(false),
260         child_inherit_(true),
261         duration_in_sec_(0),
262         can_dump_kernel_symbols_(true),
263         dump_symbols_(true),
264         event_selection_set_(false),
265         mmap_page_range_(std::make_pair(1, DESIRED_PAGES_IN_MAPPED_BUFFER)),
266         record_filename_("perf.data"),
267         sample_record_count_(0),
268         lost_record_count_(0),
269         in_app_context_(false),
270         trace_offcpu_(false),
271         exclude_kernel_callchain_(false),
272         allow_callchain_joiner_(true),
273         callchain_joiner_min_matching_nodes_(1u),
274         last_record_timestamp_(0u) {
275     // If we run `adb shell simpleperf record xxx` and stop profiling by ctrl-c, adb closes
276     // sockets connecting simpleperf. After that, simpleperf will receive SIGPIPE when writing
277     // to stdout/stderr, which is a problem when we use '--app' option. So ignore SIGPIPE to
278     // finish properly.
279     signal(SIGPIPE, SIG_IGN);
280   }
281 
282   bool Run(const std::vector<std::string>& args);
283 
284  private:
285   bool ParseOptions(const std::vector<std::string>& args,
286                     std::vector<std::string>* non_option_args);
287   bool AdjustPerfEventLimit();
288   bool PrepareRecording(Workload* workload);
289   bool DoRecording(Workload* workload);
290   bool PostProcessRecording(const std::vector<std::string>& args);
291   bool TraceOffCpu();
292   bool SetEventSelectionFlags();
293   bool CreateAndInitRecordFile();
294   std::unique_ptr<RecordFileWriter> CreateRecordFile(
295       const std::string& filename);
296   bool DumpKernelSymbol();
297   bool DumpTracingData();
298   bool DumpKernelMaps();
299   bool DumpUserSpaceMaps();
300   bool DumpProcessMaps(pid_t pid, const std::unordered_set<pid_t>& tids);
301   bool DumpAuxTraceInfo();
302   bool ProcessRecord(Record* record);
303   bool ShouldOmitRecord(Record* record);
304   bool DumpMapsForRecord(Record* record);
305   bool SaveRecordForPostUnwinding(Record* record);
306   bool SaveRecordAfterUnwinding(Record* record);
307   bool SaveRecordWithoutUnwinding(Record* record);
308   bool ProcessJITDebugInfo(const std::vector<JITDebugInfo>& debug_info, bool sync_kernel_records);
309   bool ProcessControlCmd(IOEventLoop* loop);
310 
311   void UpdateRecord(Record* record);
312   bool UnwindRecord(SampleRecord& r);
313   bool PostUnwindRecords();
314   bool JoinCallChains();
315   bool DumpAdditionalFeatures(const std::vector<std::string>& args);
316   bool DumpBuildIdFeature();
317   bool DumpFileFeature();
318   bool DumpMetaInfoFeature(bool kernel_symbols_available);
319   void CollectHitFileInfo(const SampleRecord& r);
320 
321   std::unique_ptr<SampleSpeed> sample_speed_;
322   bool system_wide_collection_;
323   uint64_t branch_sampling_;
324   bool fp_callchain_sampling_;
325   bool dwarf_callchain_sampling_;
326   uint32_t dump_stack_size_in_dwarf_sampling_;
327   bool unwind_dwarf_callchain_;
328   bool post_unwind_;
329   std::unique_ptr<OfflineUnwinder> offline_unwinder_;
330   bool child_inherit_;
331   double duration_in_sec_;
332   bool can_dump_kernel_symbols_;
333   bool dump_symbols_;
334   std::string clockid_;
335   std::vector<int> cpus_;
336   EventSelectionSet event_selection_set_;
337 
338   std::pair<size_t, size_t> mmap_page_range_;
339   size_t aux_buffer_size_ = kDefaultAuxBufferSize;
340 
341   ThreadTree thread_tree_;
342   std::string record_filename_;
343   android::base::unique_fd out_fd_;
344   std::unique_ptr<RecordFileWriter> record_file_writer_;
345   android::base::unique_fd stop_signal_fd_;
346 
347   uint64_t sample_record_count_;
348   uint64_t lost_record_count_;
349   android::base::unique_fd start_profiling_fd_;
350   bool stdio_controls_profiling_ = false;
351 
352   std::string app_package_name_;
353   bool in_app_context_;
354   bool trace_offcpu_;
355   bool exclude_kernel_callchain_;
356   uint64_t size_limit_in_bytes_ = 0;
357   uint64_t max_sample_freq_ = DEFAULT_SAMPLE_FREQ_FOR_NONTRACEPOINT_EVENT;
358   size_t cpu_time_max_percent_ = 25;
359 
360   // For CallChainJoiner
361   bool allow_callchain_joiner_;
362   size_t callchain_joiner_min_matching_nodes_;
363   std::unique_ptr<CallChainJoiner> callchain_joiner_;
364   bool allow_cutting_samples_ = true;
365 
366   std::unique_ptr<JITDebugReader> jit_debug_reader_;
367   uint64_t last_record_timestamp_;  // used to insert Mmap2Records for JIT debug info
368   TimeStat time_stat_;
369   EventAttrWithId dumping_attr_id_;
370   // In system wide recording, record if we have dumped map info for a process.
371   std::unordered_set<pid_t> dumped_processes_;
372   bool exclude_perf_ = false;
373 };
374 
375 bool RecordCommand::Run(const std::vector<std::string>& args) {
376   ScopedCurrentArch scoped_arch(GetMachineArch());
377   if (!CheckPerfEventLimit()) {
378     return false;
379   }
380   AllowMoreOpenedFiles();
381 
382   std::vector<std::string> workload_args;
383   if (!ParseOptions(args, &workload_args)) {
384     return false;
385   }
386   if (!AdjustPerfEventLimit()) {
387     return false;
388   }
389   ScopedTempFiles scoped_temp_files(android::base::Dirname(record_filename_));
390   if (!app_package_name_.empty() && !in_app_context_) {
391     // Some users want to profile non debuggable apps on rooted devices. If we use run-as,
392     // it will be impossible when using --app. So don't switch to app's context when we are
393     // root.
394     if (!IsRoot()) {
395       return RunInAppContext(app_package_name_, "record", args, workload_args.size(),
396                              record_filename_, true);
397     }
398   }
399   std::unique_ptr<Workload> workload;
400   if (!workload_args.empty()) {
401     workload = Workload::CreateWorkload(workload_args);
402     if (workload == nullptr) {
403       return false;
404     }
405   }
406   time_stat_.prepare_recording_time = GetSystemClock();
407   if (!PrepareRecording(workload.get())) {
408     return false;
409   }
410   time_stat_.start_recording_time = GetSystemClock();
411   if (!DoRecording(workload.get())) {
412     return false;
413   }
414   return PostProcessRecording(args);
415 }
416 
417 bool RecordCommand::PrepareRecording(Workload* workload) {
418   // 1. Prepare in other modules.
419   PrepareVdsoFile();
420 
421   // 2. Add default event type.
422   if (event_selection_set_.empty()) {
423     size_t group_id;
424     if (!event_selection_set_.AddEventType(default_measured_event_type, &group_id)) {
425       return false;
426     }
427     if (sample_speed_) {
428       event_selection_set_.SetSampleSpeed(group_id, *sample_speed_);
429     }
430   }
431 
432   // 3. Process options before opening perf event files.
433   exclude_kernel_callchain_ = event_selection_set_.ExcludeKernel();
434   if (trace_offcpu_ && !TraceOffCpu()) {
435     return false;
436   }
437   if (!SetEventSelectionFlags()) {
438     return false;
439   }
440   if (unwind_dwarf_callchain_) {
441     offline_unwinder_ = OfflineUnwinder::Create(false);
442   }
443   if (unwind_dwarf_callchain_ && allow_callchain_joiner_) {
444     callchain_joiner_.reset(new CallChainJoiner(DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE,
445                                                 callchain_joiner_min_matching_nodes_,
446                                                 false));
447   }
448 
449   // 4. Add monitored targets.
450   bool need_to_check_targets = false;
451   if (system_wide_collection_) {
452     event_selection_set_.AddMonitoredThreads({-1});
453   } else if (!event_selection_set_.HasMonitoredTarget()) {
454     if (workload != nullptr) {
455       event_selection_set_.AddMonitoredProcesses({workload->GetPid()});
456       event_selection_set_.SetEnableOnExec(true);
457     } else if (!app_package_name_.empty()) {
458       // If app process is not created, wait for it. This allows simpleperf starts before
459       // app process. In this way, we can have a better support of app start-up time profiling.
460       std::set<pid_t> pids = WaitForAppProcesses(app_package_name_);
461       event_selection_set_.AddMonitoredProcesses(pids);
462       need_to_check_targets = true;
463     } else {
464       LOG(ERROR)
465           << "No threads to monitor. Try `simpleperf help record` for help";
466       return false;
467     }
468   } else {
469     need_to_check_targets = true;
470   }
471   // Profiling JITed/interpreted Java code is supported starting from Android P.
472   // Also support profiling art interpreter on host.
473   if (GetAndroidVersion() >= kAndroidVersionP || GetAndroidVersion() == 0) {
474     // JIT symfiles are stored in temporary files, and are deleted after recording. But if
475     // `-g --no-unwind` option is used, we want to keep symfiles to support unwinding in
476     // the debug-unwind cmd.
477     bool keep_symfiles = dwarf_callchain_sampling_ && !unwind_dwarf_callchain_;
478     bool sync_with_records = clockid_ == "monotonic";
479     jit_debug_reader_.reset(new JITDebugReader(keep_symfiles, sync_with_records));
480     // To profile java code, need to dump maps containing vdex files, which are not executable.
481     event_selection_set_.SetRecordNotExecutableMaps(true);
482   }
483 
484   // 5. Open perf event files and create mapped buffers.
485   if (!event_selection_set_.OpenEventFiles(cpus_)) {
486     return false;
487   }
488   size_t record_buffer_size = system_wide_collection_ ? kSystemWideRecordBufferSize
489                                                       : kRecordBufferSize;
490   if (!event_selection_set_.MmapEventFiles(mmap_page_range_.first, mmap_page_range_.second,
491                                            aux_buffer_size_, record_buffer_size,
492                                            allow_cutting_samples_, exclude_perf_)) {
493     return false;
494   }
495   auto callback =
496       std::bind(&RecordCommand::ProcessRecord, this, std::placeholders::_1);
497   if (!event_selection_set_.PrepareToReadMmapEventData(callback)) {
498     return false;
499   }
500 
501   // 6. Create perf.data.
502   if (!CreateAndInitRecordFile()) {
503     return false;
504   }
505 
506   // 7. Add read/signal/periodic Events.
507   if (need_to_check_targets && !event_selection_set_.StopWhenNoMoreTargets()) {
508     return false;
509   }
510   IOEventLoop* loop = event_selection_set_.GetIOEventLoop();
511   auto exit_loop_callback = [loop]() {
512     return loop->ExitLoop();
513   };
514   if (!loop->AddSignalEvents({SIGCHLD, SIGINT, SIGTERM}, exit_loop_callback)) {
515     return false;
516   }
517 
518   // Only add an event for SIGHUP if we didn't inherit SIG_IGN (e.g. from nohup).
519   if (!SignalIsIgnored(SIGHUP)) {
520     if (!loop->AddSignalEvent(SIGHUP, exit_loop_callback)) {
521       return false;
522     }
523   }
524   if (stop_signal_fd_ != -1) {
525     if (!loop->AddReadEvent(stop_signal_fd_, exit_loop_callback)) {
526       return false;
527     }
528   }
529 
530   if (duration_in_sec_ != 0) {
531     if (!loop->AddPeriodicEvent(SecondToTimeval(duration_in_sec_),
532                                 [loop]() { return loop->ExitLoop(); })) {
533       return false;
534     }
535   }
536   if (stdio_controls_profiling_) {
537     if (!loop->AddReadEvent(0, [this, loop]() { return ProcessControlCmd(loop); })) {
538       return false;
539     }
540   }
541   if (jit_debug_reader_) {
542     auto callback = [this](const std::vector<JITDebugInfo>& debug_info, bool sync_kernel_records) {
543       return ProcessJITDebugInfo(debug_info, sync_kernel_records);
544     };
545     if (!jit_debug_reader_->RegisterDebugInfoCallback(loop, callback)) {
546       return false;
547     }
548     if (!system_wide_collection_) {
549       std::set<pid_t> pids = event_selection_set_.GetMonitoredProcesses();
550       for (pid_t tid : event_selection_set_.GetMonitoredThreads()) {
551         pid_t pid;
552         if (GetProcessForThread(tid, &pid)) {
553           pids.insert(pid);
554         }
555       }
556       for (pid_t pid : pids) {
557         if (!jit_debug_reader_->MonitorProcess(pid)) {
558           return false;
559         }
560       }
561       if (!jit_debug_reader_->ReadAllProcesses()) {
562         return false;
563       }
564     }
565   }
566   if (event_selection_set_.HasAuxTrace()) {
567     // ETM data is dumped to kernel buffer only when there is no thread traced by ETM. It happens
568     // either when all monitored threads are scheduled off cpu, or when all etm perf events are
569     // disabled.
570     // If ETM data isn't dumped to kernel buffer in time, overflow parts will be dropped. This
571     // makes less than expected data, especially in system wide recording. So add a periodic event
572     // to flush etm data by temporarily disable all perf events.
573     auto etm_flush = [this]() {
574       return event_selection_set_.SetEnableEvents(false) &&
575              event_selection_set_.SetEnableEvents(true);
576     };
577     if (!loop->AddPeriodicEvent(SecondToTimeval(kDefaultEtmDataFlushPeriodInSec), etm_flush)) {
578       return false;
579     }
580   }
581   return true;
582 }
583 
584 bool RecordCommand::DoRecording(Workload* workload) {
585   // Write records in mapped buffers of perf_event_files to output file while workload is running.
586   if (workload != nullptr && !workload->IsStarted() && !workload->Start()) {
587     return false;
588   }
589   if (start_profiling_fd_.get() != -1) {
590     if (!android::base::WriteStringToFd("STARTED", start_profiling_fd_)) {
591       PLOG(ERROR) << "failed to write to start_profiling_fd_";
592     }
593     start_profiling_fd_.reset();
594   }
595   if (stdio_controls_profiling_) {
596     printf("started\n");
597     fflush(stdout);
598   }
599   if (!event_selection_set_.GetIOEventLoop()->RunLoop()) {
600     return false;
601   }
602   time_stat_.stop_recording_time = GetSystemClock();
603   if (!event_selection_set_.FinishReadMmapEventData()) {
604     return false;
605   }
606   time_stat_.finish_recording_time = GetSystemClock();
607   return true;
608 }
609 
610 static bool WriteRecordDataToOutFd(const std::string& in_filename, android::base::unique_fd out_fd) {
611   android::base::unique_fd in_fd(FileHelper::OpenReadOnly(in_filename));
612   if (in_fd == -1) {
613     PLOG(ERROR) << "Failed to open " << in_filename;
614     return false;
615   }
616   char buf[8192];
617   while (true) {
618     ssize_t n = TEMP_FAILURE_RETRY(read(in_fd, buf, sizeof(buf)));
619     if (n < 0) {
620       PLOG(ERROR) << "Failed to read " << in_filename;
621       return false;
622     }
623     if (n == 0) {
624       break;
625     }
626     if (!android::base::WriteFully(out_fd, buf, n)) {
627       PLOG(ERROR) << "Failed to write to out_fd";
628       return false;
629     }
630   }
631   unlink(in_filename.c_str());
632   return true;
633 }
634 
635 bool RecordCommand::PostProcessRecording(const std::vector<std::string>& args) {
636   // 1. Post unwind dwarf callchain.
637   if (unwind_dwarf_callchain_ && post_unwind_) {
638     if (!PostUnwindRecords()) {
639       return false;
640     }
641   }
642 
643   // 2. Optionally join Callchains.
644   if (callchain_joiner_) {
645     JoinCallChains();
646   }
647 
648   // 3. Dump additional features, and close record file.
649   if (!DumpAdditionalFeatures(args)) {
650     return false;
651   }
652   if (!record_file_writer_->Close()) {
653     return false;
654   }
655   if (out_fd_ != -1 && !WriteRecordDataToOutFd(record_filename_, std::move(out_fd_))) {
656     return false;
657   }
658   time_stat_.post_process_time = GetSystemClock();
659 
660   // 4. Show brief record result.
661   auto record_stat = event_selection_set_.GetRecordStat();
662   if (event_selection_set_.HasAuxTrace()) {
663     LOG(INFO) << "Aux data traced: " << record_stat.aux_data_size;
664     if (record_stat.lost_aux_data_size != 0) {
665       LOG(INFO) << "Aux data lost in user space: " << record_stat.lost_aux_data_size;
666     }
667   } else {
668     std::string cut_samples;
669     if (record_stat.cut_stack_samples > 0) {
670       cut_samples = android::base::StringPrintf(" (cut %zu)", record_stat.cut_stack_samples);
671     }
672     lost_record_count_ += record_stat.lost_samples + record_stat.lost_non_samples;
673     LOG(INFO) << "Samples recorded: " << sample_record_count_ << cut_samples
674               << ". Samples lost: " << lost_record_count_ << ".";
675     LOG(DEBUG) << "In user space, dropped " << record_stat.lost_samples << " samples, "
676                << record_stat.lost_non_samples << " non samples, cut stack of "
677                << record_stat.cut_stack_samples << " samples.";
678     if (sample_record_count_ + lost_record_count_ != 0) {
679       double lost_percent =
680           static_cast<double>(lost_record_count_) / (lost_record_count_ + sample_record_count_);
681       constexpr double LOST_PERCENT_WARNING_BAR = 0.1;
682       if (lost_percent >= LOST_PERCENT_WARNING_BAR) {
683         LOG(WARNING) << "Lost " << (lost_percent * 100) << "% of samples, "
684                      << "consider increasing mmap_pages(-m), "
685                      << "or decreasing sample frequency(-f), "
686                      << "or increasing sample period(-c).";
687       }
688     }
689     if (callchain_joiner_) {
690       callchain_joiner_->DumpStat();
691     }
692   }
693   LOG(DEBUG) << "Prepare recording time "
694       << (time_stat_.start_recording_time - time_stat_.prepare_recording_time) / 1e6
695       << " ms, recording time "
696       << (time_stat_.stop_recording_time - time_stat_.start_recording_time) / 1e6
697       << " ms, stop recording time "
698       << (time_stat_.finish_recording_time - time_stat_.stop_recording_time) / 1e6
699       << " ms, post process time "
700       << (time_stat_.post_process_time - time_stat_.finish_recording_time) / 1e6 << " ms.";
701   return true;
702 }
703 
704 bool RecordCommand::ParseOptions(const std::vector<std::string>& args,
705                                  std::vector<std::string>* non_option_args) {
706   std::vector<size_t> wait_setting_speed_event_groups_;
707   size_t i;
708   for (i = 0; i < args.size() && !args[i].empty() && args[i][0] == '-'; ++i) {
709     if (args[i] == "-a") {
710       system_wide_collection_ = true;
711     } else if (args[i] == "--app") {
712       if (!NextArgumentOrError(args, &i)) {
713         return false;
714       }
715       app_package_name_ = args[i];
716     } else if (args[i] == "--aux-buffer-size") {
717       if (!GetUintOption(args, &i, &aux_buffer_size_, 0, std::numeric_limits<size_t>::max(),
718                          true)) {
719         return false;
720       }
721       if (!IsPowerOfTwo(aux_buffer_size_) || aux_buffer_size_ % sysconf(_SC_PAGE_SIZE)) {
722         LOG(ERROR) << "invalid aux buffer size: " << args[i];
723         return false;
724       }
725     } else if (args[i] == "-b") {
726       branch_sampling_ = branch_sampling_type_map["any"];
727     } else if (args[i] == "-c" || args[i] == "-f") {
728       uint64_t value;
729       if (!GetUintOption(args, &i, &value, 1)) {
730         return false;
731       }
732       if (args[i-1] == "-c") {
733         sample_speed_.reset(new SampleSpeed(0, value));
734       } else {
735         if (value >= INT_MAX) {
736           LOG(ERROR) << "sample freq can't be bigger than INT_MAX.";
737           return false;
738         }
739         sample_speed_.reset(new SampleSpeed(value, 0));
740         max_sample_freq_ = std::max(max_sample_freq_, value);
741       }
742       for (auto group_id : wait_setting_speed_event_groups_) {
743         event_selection_set_.SetSampleSpeed(group_id, *sample_speed_);
744       }
745       wait_setting_speed_event_groups_.clear();
746 
747     } else if (args[i] == "--call-graph") {
748       if (!NextArgumentOrError(args, &i)) {
749         return false;
750       }
751       std::vector<std::string> strs = android::base::Split(args[i], ",");
752       if (strs[0] == "fp") {
753         fp_callchain_sampling_ = true;
754         dwarf_callchain_sampling_ = false;
755       } else if (strs[0] == "dwarf") {
756         fp_callchain_sampling_ = false;
757         dwarf_callchain_sampling_ = true;
758         if (strs.size() > 1) {
759           uint64_t size;
760           if (!android::base::ParseUint(strs[1], &size)) {
761             LOG(ERROR) << "invalid dump stack size in --call-graph option: " << strs[1];
762             return false;
763           }
764           if ((size & 7) != 0) {
765             LOG(ERROR) << "dump stack size " << size
766                        << " is not 8-byte aligned.";
767             return false;
768           }
769           if (size >= MAX_DUMP_STACK_SIZE) {
770             LOG(ERROR) << "dump stack size " << size
771                        << " is bigger than max allowed size "
772                        << MAX_DUMP_STACK_SIZE << ".";
773             return false;
774           }
775           dump_stack_size_in_dwarf_sampling_ = static_cast<uint32_t>(size);
776         }
777       } else {
778         LOG(ERROR) << "unexpected argument for --call-graph option: "
779                    << args[i];
780         return false;
781       }
782     } else if (args[i] == "--clockid") {
783       if (!NextArgumentOrError(args, &i)) {
784         return false;
785       }
786       if (args[i] != "perf") {
787         if (!IsSettingClockIdSupported()) {
788           LOG(ERROR) << "Setting clockid is not supported by the kernel.";
789           return false;
790         }
791         if (clockid_map.find(args[i]) == clockid_map.end()) {
792           LOG(ERROR) << "Invalid clockid: " << args[i];
793           return false;
794         }
795       }
796       clockid_ = args[i];
797     } else if (args[i] == "--cpu") {
798       if (!NextArgumentOrError(args, &i)) {
799         return false;
800       }
801       cpus_ = GetCpusFromString(args[i]);
802     } else if (args[i] == "--cpu-percent") {
803       if (!GetUintOption(args, &i, &cpu_time_max_percent_, 1, 100)) {
804         return false;
805       }
806     } else if (args[i] == "--duration") {
807       if (!GetDoubleOption(args, &i, &duration_in_sec_, 1e-9)) {
808         return false;
809       }
810     } else if (args[i] == "-e") {
811       if (!NextArgumentOrError(args, &i)) {
812         return false;
813       }
814       std::vector<std::string> event_types = android::base::Split(args[i], ",");
815       for (auto& event_type : event_types) {
816         size_t group_id;
817         if (!event_selection_set_.AddEventType(event_type, &group_id)) {
818           return false;
819         }
820         if (sample_speed_) {
821           event_selection_set_.SetSampleSpeed(group_id, *sample_speed_);
822         } else {
823           wait_setting_speed_event_groups_.push_back(group_id);
824         }
825       }
826     } else if (args[i] == "--exclude-perf") {
827       exclude_perf_ = true;
828     } else if (args[i] == "--exit-with-parent") {
829       prctl(PR_SET_PDEATHSIG, SIGHUP, 0, 0, 0);
830     } else if (args[i] == "-g") {
831       fp_callchain_sampling_ = false;
832       dwarf_callchain_sampling_ = true;
833     } else if (args[i] == "--group") {
834       if (!NextArgumentOrError(args, &i)) {
835         return false;
836       }
837       std::vector<std::string> event_types = android::base::Split(args[i], ",");
838       size_t group_id;
839       if (!event_selection_set_.AddEventGroup(event_types, &group_id)) {
840         return false;
841       }
842       if (sample_speed_) {
843         event_selection_set_.SetSampleSpeed(group_id, *sample_speed_);
844       } else {
845         wait_setting_speed_event_groups_.push_back(group_id);
846       }
847     } else if (args[i] == "--in-app") {
848       in_app_context_ = true;
849     } else if (args[i] == "--include-filter") {
850       if (!NextArgumentOrError(args, &i)) {
851         return false;
852       }
853       event_selection_set_.SetIncludeFilters(android::base::Split(args[i], ","));
854     } else if (args[i] == "-j") {
855       if (!NextArgumentOrError(args, &i)) {
856         return false;
857       }
858       std::vector<std::string> branch_sampling_types =
859           android::base::Split(args[i], ",");
860       for (auto& type : branch_sampling_types) {
861         auto it = branch_sampling_type_map.find(type);
862         if (it == branch_sampling_type_map.end()) {
863           LOG(ERROR) << "unrecognized branch sampling filter: " << type;
864           return false;
865         }
866         branch_sampling_ |= it->second;
867       }
868     } else if (args[i] == "-m") {
869       uint64_t pages;
870       if (!GetUintOption(args, &i, &pages)) {
871         return false;
872       }
873       if (!IsPowerOfTwo(pages)) {
874         LOG(ERROR) << "Invalid mmap_pages: '" << args[i] << "'";
875         return false;
876       }
877       mmap_page_range_.first = mmap_page_range_.second = pages;
878     } else if (args[i] == "--no-dump-kernel-symbols") {
879       can_dump_kernel_symbols_ = false;
880     } else if (args[i] == "--no-dump-symbols") {
881       dump_symbols_ = false;
882     } else if (args[i] == "--no-inherit") {
883       child_inherit_ = false;
884     } else if (args[i] == "--no-unwind") {
885       unwind_dwarf_callchain_ = false;
886     } else if (args[i] == "--no-callchain-joiner") {
887       allow_callchain_joiner_ = false;
888     } else if (args[i] == "--callchain-joiner-min-matching-nodes") {
889       if (!GetUintOption(args, &i, &callchain_joiner_min_matching_nodes_, 1)) {
890         return false;
891       }
892     } else if (args[i] == "--no-cut-samples") {
893       allow_cutting_samples_ = false;
894     } else if (args[i] == "-o") {
895       if (!NextArgumentOrError(args, &i)) {
896         return false;
897       }
898       record_filename_ = args[i];
899     } else if (args[i] == "--out-fd") {
900       int fd;
901       if (!GetUintOption(args, &i, &fd)) {
902         return false;
903       }
904       out_fd_.reset(fd);
905     } else if (args[i] == "-p") {
906       if (!NextArgumentOrError(args, &i)) {
907         return false;
908       }
909       std::set<pid_t> pids;
910       if (!GetValidThreadsFromThreadString(args[i], &pids)) {
911         return false;
912       }
913       event_selection_set_.AddMonitoredProcesses(pids);
914     } else if (android::base::StartsWith(args[i], "--post-unwind")) {
915       if (args[i] == "--post-unwind" || args[i] == "--post-unwind=yes") {
916         post_unwind_ = true;
917       } else if (args[i] == "--post-unwind=no") {
918         post_unwind_ = false;
919       } else {
920         LOG(ERROR) << "unexpected option " << args[i];
921         return false;
922       }
923     } else if (args[i] == "--size-limit") {
924       if (!GetUintOption(args, &i, &size_limit_in_bytes_, 1, std::numeric_limits<uint64_t>::max(),
925                          true)) {
926         return false;
927       }
928     } else if (args[i] == "--start_profiling_fd") {
929       int fd;
930       if (!GetUintOption(args, &i, &fd)) {
931         return false;
932       }
933       start_profiling_fd_.reset(fd);
934     } else if (args[i] == "--stdio-controls-profiling") {
935       stdio_controls_profiling_ = true;
936     } else if (args[i] == "--stop-signal-fd") {
937       int fd;
938       if (!GetUintOption(args, &i, &fd)) {
939         return false;
940       }
941       stop_signal_fd_.reset(fd);
942     } else if (args[i] == "--symfs") {
943       if (!NextArgumentOrError(args, &i)) {
944         return false;
945       }
946       if (!Dso::SetSymFsDir(args[i])) {
947         return false;
948       }
949     } else if (args[i] == "-t") {
950       if (!NextArgumentOrError(args, &i)) {
951         return false;
952       }
953       std::set<pid_t> tids;
954       if (!GetValidThreadsFromThreadString(args[i], &tids)) {
955         return false;
956       }
957       event_selection_set_.AddMonitoredThreads(tids);
958     } else if (args[i] == "--trace-offcpu") {
959       trace_offcpu_ = true;
960     } else if (args[i] == "--tracepoint-events") {
961       if (!NextArgumentOrError(args, &i)) {
962         return false;
963       }
964       if (!SetTracepointEventsFilePath(args[i])) {
965         return false;
966       }
967     } else if (args[i] == "--") {
968       i++;
969       break;
970     } else {
971       ReportUnknownOption(args, i);
972       return false;
973     }
974   }
975 
976   if (!dwarf_callchain_sampling_) {
977     if (!unwind_dwarf_callchain_) {
978       LOG(ERROR)
979           << "--no-unwind is only used with `--call-graph dwarf` option.";
980       return false;
981     }
982     unwind_dwarf_callchain_ = false;
983   }
984   if (post_unwind_) {
985     if (!dwarf_callchain_sampling_ || !unwind_dwarf_callchain_) {
986       post_unwind_ = false;
987     }
988   }
989 
990   if (fp_callchain_sampling_) {
991     if (GetBuildArch() == ARCH_ARM) {
992       LOG(WARNING) << "`--callgraph fp` option doesn't work well on arm architecture, "
993                    << "consider using `-g` option or profiling on aarch64 architecture.";
994     }
995   }
996 
997   if (system_wide_collection_ && event_selection_set_.HasMonitoredTarget()) {
998     LOG(ERROR) << "Record system wide and existing processes/threads can't be "
999                   "used at the same time.";
1000     return false;
1001   }
1002 
1003   if (system_wide_collection_ && !IsRoot()) {
1004     LOG(ERROR) << "System wide profiling needs root privilege.";
1005     return false;
1006   }
1007 
1008   if (dump_symbols_ && can_dump_kernel_symbols_) {
1009     // No need to dump kernel symbols as we will dump all required symbols.
1010     can_dump_kernel_symbols_ = false;
1011   }
1012   if (clockid_.empty()) {
1013     clockid_ = IsSettingClockIdSupported() ? "monotonic" : "perf";
1014   }
1015 
1016   non_option_args->clear();
1017   for (; i < args.size(); ++i) {
1018     non_option_args->push_back(args[i]);
1019   }
1020   return true;
1021 }
1022 
1023 bool RecordCommand::AdjustPerfEventLimit() {
1024   bool set_prop = false;
1025   // 1. Adjust max_sample_rate.
1026   uint64_t cur_max_freq;
1027   if (GetMaxSampleFrequency(&cur_max_freq) && cur_max_freq < max_sample_freq_ &&
1028       !SetMaxSampleFrequency(max_sample_freq_)) {
1029     set_prop = true;
1030   }
1031   // 2. Adjust perf_cpu_time_max_percent.
1032   size_t cur_percent;
1033   if (GetCpuTimeMaxPercent(&cur_percent) && cur_percent != cpu_time_max_percent_ &&
1034       !SetCpuTimeMaxPercent(cpu_time_max_percent_)) {
1035     set_prop = true;
1036   }
1037   // 3. Adjust perf_event_mlock_kb.
1038   long cpus = sysconf(_SC_NPROCESSORS_CONF);
1039   uint64_t mlock_kb = cpus * (mmap_page_range_.second + 1) * 4;
1040   if (event_selection_set_.HasAuxTrace()) {
1041     mlock_kb += cpus * aux_buffer_size_ / 1024;
1042   }
1043   uint64_t cur_mlock_kb;
1044   if (GetPerfEventMlockKb(&cur_mlock_kb) && cur_mlock_kb < mlock_kb &&
1045       !SetPerfEventMlockKb(mlock_kb)) {
1046     set_prop = true;
1047   }
1048 
1049   if (GetAndroidVersion() >= kAndroidVersionP + 1 && set_prop && !in_app_context_) {
1050     return SetPerfEventLimits(std::max(max_sample_freq_, cur_max_freq), cpu_time_max_percent_,
1051                               std::max(mlock_kb, cur_mlock_kb));
1052   }
1053   return true;
1054 }
1055 
1056 bool RecordCommand::TraceOffCpu() {
1057   if (FindEventTypeByName("sched:sched_switch") == nullptr) {
1058     LOG(ERROR) << "Can't trace off cpu because sched:sched_switch event is not available";
1059     return false;
1060   }
1061   for (auto& event_type : event_selection_set_.GetTracepointEvents()) {
1062     if (event_type->name == "sched:sched_switch") {
1063       LOG(ERROR) << "Trace offcpu can't be used together with sched:sched_switch event";
1064       return false;
1065     }
1066   }
1067   if (!IsDumpingRegsForTracepointEventsSupported()) {
1068     LOG(ERROR) << "Dumping regs for tracepoint events is not supported by the kernel";
1069     return false;
1070   }
1071   return event_selection_set_.AddEventType("sched:sched_switch");
1072 }
1073 
1074 bool RecordCommand::SetEventSelectionFlags() {
1075   event_selection_set_.SampleIdAll();
1076   if (!event_selection_set_.SetBranchSampling(branch_sampling_)) {
1077     return false;
1078   }
1079   if (fp_callchain_sampling_) {
1080     event_selection_set_.EnableFpCallChainSampling();
1081   } else if (dwarf_callchain_sampling_) {
1082     if (!event_selection_set_.EnableDwarfCallChainSampling(
1083             dump_stack_size_in_dwarf_sampling_)) {
1084       return false;
1085     }
1086   }
1087   event_selection_set_.SetInherit(child_inherit_);
1088   if (clockid_ != "perf") {
1089     event_selection_set_.SetClockId(clockid_map[clockid_]);
1090   }
1091   return true;
1092 }
1093 
1094 bool RecordCommand::CreateAndInitRecordFile() {
1095   record_file_writer_ = CreateRecordFile(record_filename_);
1096   if (record_file_writer_ == nullptr) {
1097     return false;
1098   }
1099   // Use first perf_event_attr and first event id to dump mmap and comm records.
1100   dumping_attr_id_ = event_selection_set_.GetEventAttrWithId()[0];
1101   return DumpKernelSymbol() && DumpTracingData() && DumpKernelMaps() && DumpUserSpaceMaps() &&
1102          DumpAuxTraceInfo();
1103 }
1104 
1105 std::unique_ptr<RecordFileWriter> RecordCommand::CreateRecordFile(
1106     const std::string& filename) {
1107   std::unique_ptr<RecordFileWriter> writer =
1108       RecordFileWriter::CreateInstance(filename);
1109   if (writer == nullptr) {
1110     return nullptr;
1111   }
1112 
1113   if (!writer->WriteAttrSection(event_selection_set_.GetEventAttrWithId())) {
1114     return nullptr;
1115   }
1116   return writer;
1117 }
1118 
1119 bool RecordCommand::DumpKernelSymbol() {
1120   if (can_dump_kernel_symbols_) {
1121     std::string kallsyms;
1122     if (event_selection_set_.NeedKernelSymbol() &&
1123         CheckKernelSymbolAddresses()) {
1124       if (!android::base::ReadFileToString("/proc/kallsyms", &kallsyms)) {
1125         PLOG(ERROR) << "failed to read /proc/kallsyms";
1126         return false;
1127       }
1128       KernelSymbolRecord r(kallsyms);
1129       if (!ProcessRecord(&r)) {
1130         return false;
1131       }
1132     }
1133   }
1134   return true;
1135 }
1136 
1137 bool RecordCommand::DumpTracingData() {
1138   std::vector<const EventType*> tracepoint_event_types =
1139       event_selection_set_.GetTracepointEvents();
1140   if (tracepoint_event_types.empty() || !CanRecordRawData() || in_app_context_) {
1141     return true;  // No need to dump tracing data, or can't do it.
1142   }
1143   std::vector<char> tracing_data;
1144   if (!GetTracingData(tracepoint_event_types, &tracing_data)) {
1145     return false;
1146   }
1147   TracingDataRecord record(tracing_data);
1148   if (!ProcessRecord(&record)) {
1149     return false;
1150   }
1151   return true;
1152 }
1153 
1154 bool RecordCommand::DumpKernelMaps() {
1155   KernelMmap kernel_mmap;
1156   std::vector<KernelMmap> module_mmaps;
1157   GetKernelAndModuleMmaps(&kernel_mmap, &module_mmaps);
1158 
1159   MmapRecord mmap_record(*dumping_attr_id_.attr, true, UINT_MAX, 0, kernel_mmap.start_addr,
1160                          kernel_mmap.len, 0, kernel_mmap.filepath, dumping_attr_id_.ids[0]);
1161   if (!ProcessRecord(&mmap_record)) {
1162     return false;
1163   }
1164   for (auto& module_mmap : module_mmaps) {
1165     MmapRecord mmap_record(*dumping_attr_id_.attr, true, UINT_MAX, 0, module_mmap.start_addr,
1166                            module_mmap.len, 0, module_mmap.filepath, dumping_attr_id_.ids[0]);
1167     if (!ProcessRecord(&mmap_record)) {
1168       return false;
1169     }
1170   }
1171   return true;
1172 }
1173 
1174 bool RecordCommand::DumpUserSpaceMaps() {
1175   // For system_wide profiling:
1176   //   If no aux tracing, maps of a process is dumped when needed (first time a sample hits
1177   //     that process).
1178   //   If aux tracing, we don't know which maps will be needed, so dump all process maps.
1179   if (system_wide_collection_ && !event_selection_set_.HasAuxTrace()) {
1180     return true;
1181   }
1182   // Map from process id to a set of thread ids in that process.
1183   std::unordered_map<pid_t, std::unordered_set<pid_t>> process_map;
1184   if (system_wide_collection_) {
1185     for (auto pid : GetAllProcesses()) {
1186       process_map[pid] = std::unordered_set<pid_t>();
1187     }
1188   } else {
1189     for (pid_t pid : event_selection_set_.GetMonitoredProcesses()) {
1190       std::vector<pid_t> tids = GetThreadsInProcess(pid);
1191       process_map[pid].insert(tids.begin(), tids.end());
1192     }
1193     for (pid_t tid : event_selection_set_.GetMonitoredThreads()) {
1194       pid_t pid;
1195       if (GetProcessForThread(tid, &pid)) {
1196         process_map[pid].insert(tid);
1197       }
1198     }
1199   }
1200 
1201   // Dump each process.
1202   for (auto& pair : process_map) {
1203     if (!DumpProcessMaps(pair.first, pair.second)) {
1204       return false;
1205     }
1206   }
1207   return true;
1208 }
1209 
1210 bool RecordCommand::DumpProcessMaps(pid_t pid, const std::unordered_set<pid_t>& tids) {
1211   // Dump mmap records.
1212   std::vector<ThreadMmap> thread_mmaps;
1213   if (!GetThreadMmapsInProcess(pid, &thread_mmaps)) {
1214     // The process may exit before we get its info.
1215     return true;
1216   }
1217   const perf_event_attr& attr = *dumping_attr_id_.attr;
1218   uint64_t event_id = dumping_attr_id_.ids[0];
1219   for (const auto& map : thread_mmaps) {
1220     if (!(map.prot & PROT_EXEC) && !event_selection_set_.RecordNotExecutableMaps()) {
1221       continue;
1222     }
1223     Mmap2Record record(attr, false, pid, pid, map.start_addr, map.len,
1224                       map.pgoff, map.prot, map.name, event_id, last_record_timestamp_);
1225     if (!ProcessRecord(&record)) {
1226       return false;
1227     }
1228   }
1229   // Dump process name.
1230   std::string name = GetCompleteProcessName(pid);
1231   if (!name.empty()) {
1232     CommRecord record(attr, pid, pid, name, event_id, last_record_timestamp_);
1233     if (!ProcessRecord(&record)) {
1234       return false;
1235     }
1236   }
1237   // Dump thread info.
1238   for (const auto& tid : tids) {
1239     if (tid != pid && GetThreadName(tid, &name)) {
1240       CommRecord comm_record(attr, pid, tid, name, event_id, last_record_timestamp_);
1241       if (!ProcessRecord(&comm_record)) {
1242         return false;
1243       }
1244     }
1245   }
1246   return true;
1247 }
1248 
1249 bool RecordCommand::ProcessRecord(Record* record) {
1250   UpdateRecord(record);
1251   if (ShouldOmitRecord(record)) {
1252     return true;
1253   }
1254   if (size_limit_in_bytes_ > 0u) {
1255     if (size_limit_in_bytes_ < record_file_writer_->GetDataSectionSize()) {
1256       return event_selection_set_.GetIOEventLoop()->ExitLoop();
1257     }
1258   }
1259   if (jit_debug_reader_ && !jit_debug_reader_->UpdateRecord(record)) {
1260     return false;
1261   }
1262   last_record_timestamp_ = std::max(last_record_timestamp_, record->Timestamp());
1263   // In system wide recording, maps are dumped when they are needed by records.
1264   if (system_wide_collection_ && !DumpMapsForRecord(record)) {
1265     return false;
1266   }
1267   if (unwind_dwarf_callchain_) {
1268     if (post_unwind_) {
1269       return SaveRecordForPostUnwinding(record);
1270     }
1271     return SaveRecordAfterUnwinding(record);
1272   }
1273   return SaveRecordWithoutUnwinding(record);
1274 }
1275 
1276 bool RecordCommand::DumpAuxTraceInfo() {
1277   if (event_selection_set_.HasAuxTrace()) {
1278     AuxTraceInfoRecord auxtrace_info = ETMRecorder::GetInstance().CreateAuxTraceInfoRecord();
1279     return ProcessRecord(&auxtrace_info);
1280   }
1281   return true;
1282 }
1283 
1284 template <typename MmapRecordType>
1285 bool MapOnlyExistInMemory(MmapRecordType* record) {
1286   return !record->InKernel() && MappedFileOnlyExistInMemory(record->filename);
1287 }
1288 
1289 bool RecordCommand::ShouldOmitRecord(Record* record) {
1290   if (jit_debug_reader_) {
1291     // To profile jitted Java code, we need PROT_JIT_SYMFILE_MAP maps not overlapped by maps for
1292     // [anon:dalvik-jit-code-cache]. To profile interpreted Java code, we record maps that
1293     // are not executable. Some non-exec maps (like those for stack, heap) provide misleading map
1294     // entries for unwinding, as in http://b/77236599. So it is better to remove
1295     // dalvik-jit-code-cache and other maps that only exist in memory.
1296     switch (record->type()) {
1297       case PERF_RECORD_MMAP:
1298         return MapOnlyExistInMemory(static_cast<MmapRecord*>(record));
1299       case PERF_RECORD_MMAP2:
1300         return MapOnlyExistInMemory(static_cast<Mmap2Record*>(record));
1301     }
1302   }
1303   return false;
1304 }
1305 
1306 bool RecordCommand::DumpMapsForRecord(Record* record) {
1307   if (record->type() == PERF_RECORD_SAMPLE) {
1308     pid_t pid = static_cast<SampleRecord*>(record)->tid_data.pid;
1309     if (dumped_processes_.find(pid) == dumped_processes_.end()) {
1310       // Dump map info and all thread names for that process.
1311       std::vector<pid_t> tids = GetThreadsInProcess(pid);
1312       if (!tids.empty() &&
1313           !DumpProcessMaps(pid, std::unordered_set<pid_t>(tids.begin(), tids.end()))) {
1314         return false;
1315       }
1316       dumped_processes_.insert(pid);
1317     }
1318   }
1319   return true;
1320 }
1321 
1322 bool RecordCommand::SaveRecordForPostUnwinding(Record* record) {
1323   if (!record_file_writer_->WriteRecord(*record)) {
1324     LOG(ERROR) << "If there isn't enough space for storing profiling data, consider using "
1325                << "--no-post-unwind option.";
1326     return false;
1327   }
1328   return true;
1329 }
1330 
1331 bool RecordCommand::SaveRecordAfterUnwinding(Record* record) {
1332   if (record->type() == PERF_RECORD_SAMPLE) {
1333     auto& r = *static_cast<SampleRecord*>(record);
1334     // AdjustCallChainGeneratedByKernel() should go before UnwindRecord(). Because we don't want
1335     // to adjust callchains generated by dwarf unwinder.
1336     r.AdjustCallChainGeneratedByKernel();
1337     if (!UnwindRecord(r)) {
1338       return false;
1339     }
1340     // ExcludeKernelCallChain() should go after UnwindRecord() to notice the generated user call
1341     // chain.
1342     if (r.InKernel() && exclude_kernel_callchain_ && !r.ExcludeKernelCallChain()) {
1343       // If current record contains no user callchain, skip it.
1344       return true;
1345     }
1346     sample_record_count_++;
1347   } else if (record->type() == PERF_RECORD_LOST) {
1348     lost_record_count_ += static_cast<LostRecord*>(record)->lost;
1349   } else {
1350     thread_tree_.Update(*record);
1351   }
1352   return record_file_writer_->WriteRecord(*record);
1353 }
1354 
1355 bool RecordCommand::SaveRecordWithoutUnwinding(Record* record) {
1356   if (record->type() == PERF_RECORD_SAMPLE) {
1357     auto& r = *static_cast<SampleRecord*>(record);
1358     if (fp_callchain_sampling_ || dwarf_callchain_sampling_) {
1359       r.AdjustCallChainGeneratedByKernel();
1360     }
1361     if (r.InKernel() && exclude_kernel_callchain_ && !r.ExcludeKernelCallChain()) {
1362       // If current record contains no user callchain, skip it.
1363       return true;
1364     }
1365     sample_record_count_++;
1366   } else if (record->type() == PERF_RECORD_LOST) {
1367     lost_record_count_ += static_cast<LostRecord*>(record)->lost;
1368   }
1369   return record_file_writer_->WriteRecord(*record);
1370 }
1371 
1372 bool RecordCommand::ProcessJITDebugInfo(const std::vector<JITDebugInfo>& debug_info,
1373                                         bool sync_kernel_records) {
1374   EventAttrWithId attr_id = event_selection_set_.GetEventAttrWithId()[0];
1375   for (auto& info : debug_info) {
1376     if (info.type == JITDebugInfo::JIT_DEBUG_JIT_CODE) {
1377       uint64_t timestamp = jit_debug_reader_->SyncWithRecords() ? info.timestamp
1378                                                                 : last_record_timestamp_;
1379       Mmap2Record record(*attr_id.attr, false, info.pid, info.pid,
1380                          info.jit_code_addr, info.jit_code_len, 0, map_flags::PROT_JIT_SYMFILE_MAP,
1381                          info.file_path, attr_id.ids[0], timestamp);
1382       if (!ProcessRecord(&record)) {
1383         return false;
1384       }
1385     } else {
1386       if (info.extracted_dex_file_map) {
1387         ThreadMmap& map = *info.extracted_dex_file_map;
1388         uint64_t timestamp = jit_debug_reader_->SyncWithRecords() ? info.timestamp
1389                                                                   : last_record_timestamp_;
1390         Mmap2Record record(*attr_id.attr, false, info.pid, info.pid, map.start_addr, map.len,
1391                            map.pgoff, map.prot, map.name, attr_id.ids[0], timestamp);
1392         if (!ProcessRecord(&record)) {
1393           return false;
1394         }
1395       }
1396       thread_tree_.AddDexFileOffset(info.file_path, info.dex_file_offset);
1397     }
1398   }
1399   // We want to let samples see the most recent JIT maps generated before them, but no JIT maps
1400   // generated after them. So process existing samples each time generating new JIT maps. We prefer
1401   // to process samples after processing JIT maps. Because some of the samples may hit the new JIT
1402   // maps, and we want to report them properly.
1403   if (sync_kernel_records && !event_selection_set_.SyncKernelBuffer()) {
1404     return false;
1405   }
1406   return true;
1407 }
1408 
1409 bool RecordCommand::ProcessControlCmd(IOEventLoop* loop) {
1410   char* line = nullptr;
1411   size_t line_length = 0;
1412   if (getline(&line, &line_length, stdin) == -1) {
1413     free(line);
1414     // When the simpleperf Java API destroys the simpleperf process, it also closes the stdin pipe.
1415     // So we may see EOF of stdin.
1416     return loop->ExitLoop();
1417   }
1418   std::string cmd = android::base::Trim(line);
1419   free(line);
1420   LOG(DEBUG) << "process control cmd: " << cmd;
1421   bool result = false;
1422   if (cmd == "pause") {
1423     result = event_selection_set_.SetEnableEvents(false);
1424   } else if (cmd == "resume") {
1425     result = event_selection_set_.SetEnableEvents(true);
1426   } else {
1427     LOG(ERROR) << "unknown control cmd: " << cmd;
1428   }
1429   printf("%s\n", result ? "ok" : "error");
1430   fflush(stdout);
1431   return result;
1432 }
1433 
1434 template <class RecordType>
1435 void UpdateMmapRecordForEmbeddedPath(RecordType& r, bool has_prot, uint32_t prot) {
1436   if (r.InKernel()) {
1437     return;
1438   }
1439   std::string filename = r.filename;
1440   bool name_changed = false;
1441   // Some vdex files in map files are marked with deleted flag, but they exist in the file system.
1442   // It may be because a new file is used to replace the old one, but still worth to try.
1443   if (android::base::EndsWith(filename, " (deleted)")) {
1444     filename.resize(filename.size() - 10);
1445     name_changed = true;
1446   }
1447   if (r.data->pgoff != 0 && (!has_prot || (prot & PROT_EXEC))) {
1448     // For the case of a shared library "foobar.so" embedded
1449     // inside an APK, we rewrite the original MMAP from
1450     // ["path.apk" offset=X] to ["path.apk!/foobar.so" offset=W]
1451     // so as to make the library name explicit. This update is
1452     // done here (as part of the record operation) as opposed to
1453     // on the host during the report, since we want to report
1454     // the correct library name even if the the APK in question
1455     // is not present on the host. The new offset W is
1456     // calculated to be with respect to the start of foobar.so,
1457     // not to the start of path.apk.
1458     EmbeddedElf* ee = ApkInspector::FindElfInApkByOffset(filename, r.data->pgoff);
1459     if (ee != nullptr) {
1460       // Compute new offset relative to start of elf in APK.
1461       auto data = *r.data;
1462       data.pgoff -= ee->entry_offset();
1463       r.SetDataAndFilename(data, GetUrlInApk(filename, ee->entry_name()));
1464       return;
1465     }
1466   }
1467   std::string zip_path;
1468   std::string entry_name;
1469   if (ParseExtractedInMemoryPath(filename, &zip_path, &entry_name)) {
1470     filename = GetUrlInApk(zip_path, entry_name);
1471     name_changed = true;
1472   }
1473   if (name_changed) {
1474     auto data = *r.data;
1475     r.SetDataAndFilename(data, filename);
1476   }
1477 }
1478 
1479 void RecordCommand::UpdateRecord(Record* record) {
1480   if (record->type() == PERF_RECORD_MMAP) {
1481     UpdateMmapRecordForEmbeddedPath(*static_cast<MmapRecord*>(record), false, 0);
1482   } else if (record->type() == PERF_RECORD_MMAP2) {
1483     auto r = static_cast<Mmap2Record*>(record);
1484     UpdateMmapRecordForEmbeddedPath(*r, true, r->data->prot);
1485   } else if (record->type() == PERF_RECORD_COMM) {
1486     auto r = static_cast<CommRecord*>(record);
1487     if (r->data->pid == r->data->tid) {
1488       std::string s = GetCompleteProcessName(r->data->pid);
1489       if (!s.empty()) {
1490         r->SetCommandName(s);
1491       }
1492     }
1493   }
1494 }
1495 
1496 bool RecordCommand::UnwindRecord(SampleRecord& r) {
1497   if ((r.sample_type & PERF_SAMPLE_CALLCHAIN) &&
1498       (r.sample_type & PERF_SAMPLE_REGS_USER) &&
1499       (r.regs_user_data.reg_mask != 0) &&
1500       (r.sample_type & PERF_SAMPLE_STACK_USER) &&
1501       (r.GetValidStackSize() > 0)) {
1502     ThreadEntry* thread =
1503         thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
1504     RegSet regs(r.regs_user_data.abi, r.regs_user_data.reg_mask, r.regs_user_data.regs);
1505     std::vector<uint64_t> ips;
1506     std::vector<uint64_t> sps;
1507     if (!offline_unwinder_->UnwindCallChain(*thread, regs, r.stack_user_data.data,
1508                                             r.GetValidStackSize(), &ips, &sps)) {
1509       return false;
1510     }
1511     // The unwinding may fail if JIT debug info isn't the latest. In this case, read JIT debug info
1512     // from the process and retry unwinding.
1513     if (jit_debug_reader_ && !post_unwind_ &&
1514         offline_unwinder_->IsCallChainBrokenForIncompleteJITDebugInfo()) {
1515       jit_debug_reader_->ReadProcess(r.tid_data.pid);
1516       jit_debug_reader_->FlushDebugInfo(r.Timestamp());
1517       if (!offline_unwinder_->UnwindCallChain(*thread, regs, r.stack_user_data.data,
1518                                               r.GetValidStackSize(), &ips, &sps)) {
1519         return false;
1520       }
1521     }
1522     r.ReplaceRegAndStackWithCallChain(ips);
1523     if (callchain_joiner_) {
1524       return callchain_joiner_->AddCallChain(r.tid_data.pid, r.tid_data.tid,
1525                                              CallChainJoiner::ORIGINAL_OFFLINE, ips, sps);
1526     }
1527   }
1528   return true;
1529 }
1530 
1531 bool RecordCommand::PostUnwindRecords() {
1532   // 1. Move records from record_filename_ to a temporary file.
1533   if (!record_file_writer_->Close()) {
1534     return false;
1535   }
1536   record_file_writer_.reset();
1537   std::unique_ptr<TemporaryFile> tmp_file = ScopedTempFiles::CreateTempFile();
1538   if (!Workload::RunCmd({"mv", record_filename_, tmp_file->path})) {
1539     return false;
1540   }
1541   std::unique_ptr<RecordFileReader> reader = RecordFileReader::CreateInstance(tmp_file->path);
1542   if (!reader) {
1543     return false;
1544   }
1545 
1546   // 2. Read records from the temporary file, and write unwound records back to record_filename_.
1547   record_file_writer_ = CreateRecordFile(record_filename_);
1548   if (!record_file_writer_) {
1549     return false;
1550   }
1551   sample_record_count_ = 0;
1552   lost_record_count_ = 0;
1553   auto callback = [this](std::unique_ptr<Record> record) {
1554     return SaveRecordAfterUnwinding(record.get());
1555   };
1556   return reader->ReadDataSection(callback);
1557 }
1558 
1559 bool RecordCommand::JoinCallChains() {
1560   // 1. Prepare joined callchains.
1561   if (!callchain_joiner_->JoinCallChains()) {
1562     return false;
1563   }
1564   // 2. Move records from record_filename_ to a temporary file.
1565   if (!record_file_writer_->Close()) {
1566     return false;
1567   }
1568   record_file_writer_.reset();
1569   std::unique_ptr<TemporaryFile> tmp_file = ScopedTempFiles::CreateTempFile();
1570   if (!Workload::RunCmd({"mv", record_filename_, tmp_file->path})) {
1571     return false;
1572   }
1573 
1574   // 3. Read records from the temporary file, and write record with joined call chains back
1575   // to record_filename_.
1576   std::unique_ptr<RecordFileReader> reader = RecordFileReader::CreateInstance(tmp_file->path);
1577   record_file_writer_ = CreateRecordFile(record_filename_);
1578   if (!reader || !record_file_writer_) {
1579     return false;
1580   }
1581 
1582   auto record_callback = [&](std::unique_ptr<Record> r) {
1583     if (r->type() != PERF_RECORD_SAMPLE) {
1584       return record_file_writer_->WriteRecord(*r);
1585     }
1586     SampleRecord& sr = *static_cast<SampleRecord*>(r.get());
1587     if (!sr.HasUserCallChain()) {
1588       return record_file_writer_->WriteRecord(sr);
1589     }
1590     pid_t pid;
1591     pid_t tid;
1592     CallChainJoiner::ChainType type;
1593     std::vector<uint64_t> ips;
1594     std::vector<uint64_t> sps;
1595     if (!callchain_joiner_->GetNextCallChain(pid, tid, type, ips, sps)) {
1596       return false;
1597     }
1598     CHECK_EQ(type, CallChainJoiner::JOINED_OFFLINE);
1599     CHECK_EQ(pid, static_cast<pid_t>(sr.tid_data.pid));
1600     CHECK_EQ(tid, static_cast<pid_t>(sr.tid_data.tid));
1601     sr.UpdateUserCallChain(ips);
1602     return record_file_writer_->WriteRecord(sr);
1603   };
1604   return reader->ReadDataSection(record_callback);
1605 }
1606 
1607 bool RecordCommand::DumpAdditionalFeatures(
1608     const std::vector<std::string>& args) {
1609   // Read data section of perf.data to collect hit file information.
1610   thread_tree_.ClearThreadAndMap();
1611   bool kernel_symbols_available = false;
1612   if (CheckKernelSymbolAddresses()) {
1613     Dso::ReadKernelSymbolsFromProc();
1614     kernel_symbols_available = true;
1615   }
1616   std::vector<uint64_t> auxtrace_offset;
1617   auto callback = [&](const Record* r) {
1618     thread_tree_.Update(*r);
1619     if (r->type() == PERF_RECORD_SAMPLE) {
1620       CollectHitFileInfo(*reinterpret_cast<const SampleRecord*>(r));
1621     } else if (r->type() == PERF_RECORD_AUXTRACE) {
1622       auto auxtrace = static_cast<const AuxTraceRecord*>(r);
1623       auxtrace_offset.emplace_back(auxtrace->location.file_offset - auxtrace->size());
1624     }
1625   };
1626   if (!record_file_writer_->ReadDataSection(callback)) {
1627     return false;
1628   }
1629 
1630   size_t feature_count = 6;
1631   if (branch_sampling_) {
1632     feature_count++;
1633   }
1634   if (!auxtrace_offset.empty()) {
1635     feature_count++;
1636   }
1637   if (!record_file_writer_->BeginWriteFeatures(feature_count)) {
1638     return false;
1639   }
1640   if (!DumpBuildIdFeature()) {
1641     return false;
1642   }
1643   if (!DumpFileFeature()) {
1644     return false;
1645   }
1646   utsname uname_buf;
1647   if (TEMP_FAILURE_RETRY(uname(&uname_buf)) != 0) {
1648     PLOG(ERROR) << "uname() failed";
1649     return false;
1650   }
1651   if (!record_file_writer_->WriteFeatureString(PerfFileFormat::FEAT_OSRELEASE,
1652                                                uname_buf.release)) {
1653     return false;
1654   }
1655   if (!record_file_writer_->WriteFeatureString(PerfFileFormat::FEAT_ARCH,
1656                                                uname_buf.machine)) {
1657     return false;
1658   }
1659 
1660   std::string exec_path = android::base::GetExecutablePath();
1661   if (exec_path.empty()) exec_path = "simpleperf";
1662   std::vector<std::string> cmdline;
1663   cmdline.push_back(exec_path);
1664   cmdline.push_back("record");
1665   cmdline.insert(cmdline.end(), args.begin(), args.end());
1666   if (!record_file_writer_->WriteCmdlineFeature(cmdline)) {
1667     return false;
1668   }
1669   if (branch_sampling_ != 0 &&
1670       !record_file_writer_->WriteBranchStackFeature()) {
1671     return false;
1672   }
1673   if (!DumpMetaInfoFeature(kernel_symbols_available)) {
1674     return false;
1675   }
1676   if (!auxtrace_offset.empty() && !record_file_writer_->WriteAuxTraceFeature(auxtrace_offset)) {
1677     return false;
1678   }
1679 
1680   if (!record_file_writer_->EndWriteFeatures()) {
1681     return false;
1682   }
1683   return true;
1684 }
1685 
1686 bool RecordCommand::DumpBuildIdFeature() {
1687   std::vector<BuildIdRecord> build_id_records;
1688   BuildId build_id;
1689   std::vector<Dso*> dso_v = thread_tree_.GetAllDsos();
1690   for (Dso* dso : dso_v) {
1691     // For aux tracing, we don't know which binaries are traced.
1692     // So dump build ids for all binaries.
1693     if (!dso->HasDumpId() && !event_selection_set_.HasAuxTrace()) {
1694       continue;
1695     }
1696     if (dso->type() == DSO_KERNEL) {
1697       if (!GetKernelBuildId(&build_id)) {
1698         continue;
1699       }
1700       build_id_records.push_back(
1701           BuildIdRecord(true, UINT_MAX, build_id, dso->Path()));
1702     } else if (dso->type() == DSO_KERNEL_MODULE) {
1703       std::string path = dso->Path();
1704       std::string module_name = basename(&path[0]);
1705       if (android::base::EndsWith(module_name, ".ko")) {
1706         module_name = module_name.substr(0, module_name.size() - 3);
1707       }
1708       if (!GetModuleBuildId(module_name, &build_id)) {
1709         LOG(DEBUG) << "can't read build_id for module " << module_name;
1710         continue;
1711       }
1712       build_id_records.push_back(BuildIdRecord(true, UINT_MAX, build_id, path));
1713     } else if (dso->type() == DSO_ELF_FILE) {
1714       if (dso->Path() == DEFAULT_EXECNAME_FOR_THREAD_MMAP) {
1715         continue;
1716       }
1717       if (!GetBuildIdFromDsoPath(dso->Path(), &build_id)) {
1718         LOG(DEBUG) << "Can't read build_id from file " << dso->Path();
1719         continue;
1720       }
1721       build_id_records.push_back(
1722           BuildIdRecord(false, UINT_MAX, build_id, dso->Path()));
1723     }
1724   }
1725   if (!record_file_writer_->WriteBuildIdFeature(build_id_records)) {
1726     return false;
1727   }
1728   return true;
1729 }
1730 
1731 bool RecordCommand::DumpFileFeature() {
1732   std::vector<Dso*> dso_v = thread_tree_.GetAllDsos();
1733   return record_file_writer_->WriteFileFeatures(thread_tree_.GetAllDsos());
1734 }
1735 
1736 bool RecordCommand::DumpMetaInfoFeature(bool kernel_symbols_available) {
1737   std::unordered_map<std::string, std::string> info_map;
1738   info_map["simpleperf_version"] = GetSimpleperfVersion();
1739   info_map["system_wide_collection"] = system_wide_collection_ ? "true" : "false";
1740   info_map["trace_offcpu"] = trace_offcpu_ ? "true" : "false";
1741   // By storing event types information in perf.data, the readers of perf.data have the same
1742   // understanding of event types, even if they are on another machine.
1743   info_map["event_type_info"] = ScopedEventTypes::BuildString(event_selection_set_.GetEvents());
1744 #if defined(__ANDROID__)
1745   info_map["product_props"] = android::base::StringPrintf("%s:%s:%s",
1746                                   android::base::GetProperty("ro.product.manufacturer", "").c_str(),
1747                                   android::base::GetProperty("ro.product.model", "").c_str(),
1748                                   android::base::GetProperty("ro.product.name", "").c_str());
1749   info_map["android_version"] = android::base::GetProperty("ro.build.version.release", "");
1750   if (!app_package_name_.empty()) {
1751     info_map["app_package_name"] = app_package_name_;
1752   }
1753 #endif
1754   info_map["clockid"] = clockid_;
1755   info_map["timestamp"] = std::to_string(time(nullptr));
1756   info_map["kernel_symbols_available"] = kernel_symbols_available ? "true" : "false";
1757   return record_file_writer_->WriteMetaInfoFeature(info_map);
1758 }
1759 
1760 void RecordCommand::CollectHitFileInfo(const SampleRecord& r) {
1761   const ThreadEntry* thread =
1762       thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
1763   const MapEntry* map =
1764       thread_tree_.FindMap(thread, r.ip_data.ip, r.InKernel());
1765   Dso* dso = map->dso;
1766   const Symbol* symbol;
1767   if (dump_symbols_) {
1768     symbol = thread_tree_.FindSymbol(map, r.ip_data.ip, nullptr, &dso);
1769     if (!symbol->HasDumpId()) {
1770       dso->CreateSymbolDumpId(symbol);
1771     }
1772   }
1773   if (!dso->HasDumpId() && dso->type() != DSO_UNKNOWN_FILE) {
1774     dso->CreateDumpId();
1775   }
1776   if (r.sample_type & PERF_SAMPLE_CALLCHAIN) {
1777     bool in_kernel = r.InKernel();
1778     bool first_ip = true;
1779     for (uint64_t i = 0; i < r.callchain_data.ip_nr; ++i) {
1780       uint64_t ip = r.callchain_data.ips[i];
1781       if (ip >= PERF_CONTEXT_MAX) {
1782         switch (ip) {
1783           case PERF_CONTEXT_KERNEL:
1784             in_kernel = true;
1785             break;
1786           case PERF_CONTEXT_USER:
1787             in_kernel = false;
1788             break;
1789           default:
1790             LOG(DEBUG) << "Unexpected perf_context in callchain: " << std::hex
1791                        << ip;
1792         }
1793       } else {
1794         if (first_ip) {
1795           first_ip = false;
1796           // Remove duplication with sample ip.
1797           if (ip == r.ip_data.ip) {
1798             continue;
1799           }
1800         }
1801         map = thread_tree_.FindMap(thread, ip, in_kernel);
1802         dso = map->dso;
1803         if (dump_symbols_) {
1804           symbol = thread_tree_.FindSymbol(map, ip, nullptr, &dso);
1805           if (!symbol->HasDumpId()) {
1806             dso->CreateSymbolDumpId(symbol);
1807           }
1808         }
1809         if (!dso->HasDumpId() && dso->type() != DSO_UNKNOWN_FILE) {
1810           dso->CreateDumpId();
1811         }
1812       }
1813     }
1814   }
1815 }
1816 
1817 void RegisterRecordCommand() {
1818   RegisterCommand("record",
1819                   [] { return std::unique_ptr<Command>(new RecordCommand()); });
1820 }
1821