1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include <inttypes.h> 18 #include <libgen.h> 19 #include <signal.h> 20 #include <sys/mman.h> 21 #include <sys/prctl.h> 22 #include <sys/utsname.h> 23 #include <time.h> 24 #include <unistd.h> 25 #include <set> 26 #include <string> 27 #include <unordered_map> 28 #include <unordered_set> 29 #include <vector> 30 31 #include <android-base/logging.h> 32 #include <android-base/file.h> 33 #include <android-base/parseint.h> 34 #include <android-base/strings.h> 35 #include <android-base/unique_fd.h> 36 #if defined(__ANDROID__) 37 #include <android-base/properties.h> 38 #endif 39 40 #include "CallChainJoiner.h" 41 #include "command.h" 42 #include "environment.h" 43 #include "ETMRecorder.h" 44 #include "event_selection_set.h" 45 #include "event_type.h" 46 #include "IOEventLoop.h" 47 #include "JITDebugReader.h" 48 #include "OfflineUnwinder.h" 49 #include "read_apk.h" 50 #include "read_elf.h" 51 #include "record.h" 52 #include "record_file.h" 53 #include "thread_tree.h" 54 #include "tracing.h" 55 #include "utils.h" 56 #include "workload.h" 57 58 using namespace simpleperf; 59 60 static std::string default_measured_event_type = "cpu-cycles"; 61 62 static std::unordered_map<std::string, uint64_t> branch_sampling_type_map = { 63 {"u", PERF_SAMPLE_BRANCH_USER}, 64 {"k", PERF_SAMPLE_BRANCH_KERNEL}, 65 {"any", PERF_SAMPLE_BRANCH_ANY}, 66 {"any_call", PERF_SAMPLE_BRANCH_ANY_CALL}, 67 {"any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN}, 68 {"ind_call", PERF_SAMPLE_BRANCH_IND_CALL}, 69 }; 70 71 static std::unordered_map<std::string, int> clockid_map = { 72 {"realtime", CLOCK_REALTIME}, 73 {"monotonic", CLOCK_MONOTONIC}, 74 {"monotonic_raw", CLOCK_MONOTONIC_RAW}, 75 {"boottime", CLOCK_BOOTTIME}, 76 }; 77 78 // The max size of records dumped by kernel is 65535, and dump stack size 79 // should be a multiply of 8, so MAX_DUMP_STACK_SIZE is 65528. 80 constexpr uint32_t MAX_DUMP_STACK_SIZE = 65528; 81 82 // The max allowed pages in mapped buffer is decided by rlimit(RLIMIT_MEMLOCK). 83 // Here 1024 is a desired value for pages in mapped buffer. If mapped 84 // successfully, the buffer size = 1024 * 4K (page size) = 4M. 85 constexpr size_t DESIRED_PAGES_IN_MAPPED_BUFFER = 1024; 86 87 // Cache size used by CallChainJoiner to cache call chains in memory. 88 constexpr size_t DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE = 8 * 1024 * 1024; 89 90 // Currently, the record buffer size in user-space is set to match the kernel buffer size on a 91 // 8 core system. For system-wide recording, it is 8K pages * 4K page_size * 8 cores = 256MB. 92 // For non system-wide recording, it is 1K pages * 4K page_size * 8 cores = 64MB. 93 static constexpr size_t kRecordBufferSize = 64 * 1024 * 1024; 94 static constexpr size_t kSystemWideRecordBufferSize = 256 * 1024 * 1024; 95 96 static constexpr size_t kDefaultAuxBufferSize = 4 * 1024 * 1024; 97 98 // On Pixel 3, it takes about 1ms to enable ETM, and 16-40ms to disable ETM and copy 4M ETM data. 99 // So make default period to 100ms. 100 static constexpr double kDefaultEtmDataFlushPeriodInSec = 0.1; 101 102 struct TimeStat { 103 uint64_t prepare_recording_time = 0; 104 uint64_t start_recording_time = 0; 105 uint64_t stop_recording_time = 0; 106 uint64_t finish_recording_time = 0; 107 uint64_t post_process_time = 0; 108 }; 109 110 class RecordCommand : public Command { 111 public: 112 RecordCommand() 113 : Command( 114 "record", "record sampling info in perf.data", 115 // clang-format off 116 "Usage: simpleperf record [options] [--] [command [command-args]]\n" 117 " Gather sampling information of running [command]. And -a/-p/-t option\n" 118 " can be used to change target of sampling information.\n" 119 " The default options are: -e cpu-cycles -f 4000 -o perf.data.\n" 120 "Select monitored threads:\n" 121 "-a System-wide collection. Use with --exclude-perf to exclude samples for\n" 122 " simpleperf process.\n" 123 #if defined(__ANDROID__) 124 "--app package_name Profile the process of an Android application.\n" 125 " On non-rooted devices, the app must be debuggable,\n" 126 " because we use run-as to switch to the app's context.\n" 127 #endif 128 "-p pid1,pid2,... Record events on existing processes. Mutually exclusive\n" 129 " with -a.\n" 130 "-t tid1,tid2,... Record events on existing threads. Mutually exclusive with -a.\n" 131 "--exclude-perf Exclude samples for simpleperf process.\n" 132 "\n" 133 "Select monitored event types:\n" 134 "-e event1[:modifier1],event2[:modifier2],...\n" 135 " Select a list of events to record. An event can be:\n" 136 " 1) an event name listed in `simpleperf list`;\n" 137 " 2) a raw PMU event in rN format. N is a hex number.\n" 138 " For example, r1b selects event number 0x1b.\n" 139 " Modifiers can be added to define how the event should be\n" 140 " monitored. Possible modifiers are:\n" 141 " u - monitor user space events only\n" 142 " k - monitor kernel space events only\n" 143 "--group event1[:modifier],event2[:modifier2],...\n" 144 " Similar to -e option. But events specified in the same --group\n" 145 " option are monitored as a group, and scheduled in and out at the\n" 146 " same time.\n" 147 "--trace-offcpu Generate samples when threads are scheduled off cpu.\n" 148 " Similar to \"-c 1 -e sched:sched_switch\".\n" 149 "\n" 150 "Select monitoring options:\n" 151 "-f freq Set event sample frequency. It means recording at most [freq]\n" 152 " samples every second. For non-tracepoint events, the default\n" 153 " option is -f 4000. A -f/-c option affects all event types\n" 154 " following it until meeting another -f/-c option. For example,\n" 155 " for \"-f 1000 cpu-cycles -c 1 -e sched:sched_switch\", cpu-cycles\n" 156 " has sample freq 1000, sched:sched_switch event has sample period 1.\n" 157 "-c count Set event sample period. It means recording one sample when\n" 158 " [count] events happen. For tracepoint events, the default option\n" 159 " is -c 1.\n" 160 "--call-graph fp | dwarf[,<dump_stack_size>]\n" 161 " Enable call graph recording. Use frame pointer or dwarf debug\n" 162 " frame as the method to parse call graph in stack.\n" 163 " Default is dwarf,65528.\n" 164 "-g Same as '--call-graph dwarf'.\n" 165 "--clockid clock_id Generate timestamps of samples using selected clock.\n" 166 " Possible values are: realtime, monotonic,\n" 167 " monotonic_raw, boottime, perf. If supported, default\n" 168 " is monotonic, otherwise is perf.\n" 169 "--cpu cpu_item1,cpu_item2,...\n" 170 " Collect samples only on the selected cpus. cpu_item can be cpu\n" 171 " number like 1, or cpu range like 0-3.\n" 172 "--duration time_in_sec Monitor for time_in_sec seconds instead of running\n" 173 " [command]. Here time_in_sec may be any positive\n" 174 " floating point number.\n" 175 "-j branch_filter1,branch_filter2,...\n" 176 " Enable taken branch stack sampling. Each sample captures a series\n" 177 " of consecutive taken branches.\n" 178 " The following filters are defined:\n" 179 " any: any type of branch\n" 180 " any_call: any function call or system call\n" 181 " any_ret: any function return or system call return\n" 182 " ind_call: any indirect branch\n" 183 " u: only when the branch target is at the user level\n" 184 " k: only when the branch target is in the kernel\n" 185 " This option requires at least one branch type among any, any_call,\n" 186 " any_ret, ind_call.\n" 187 "-b Enable taken branch stack sampling. Same as '-j any'.\n" 188 "-m mmap_pages Set the size of the buffer used to receiving sample data from\n" 189 " the kernel. It should be a power of 2. If not set, the max\n" 190 " possible value <= 1024 will be used.\n" 191 "--aux-buffer-size <buffer_size> Set aux buffer size, only used in cs-etm event type.\n" 192 " Need to be power of 2 and page size aligned.\n" 193 " Used memory size is (buffer_size * (cpu_count + 1).\n" 194 " Default is 4M.\n" 195 "--no-inherit Don't record created child threads/processes.\n" 196 "--cpu-percent <percent> Set the max percent of cpu time used for recording.\n" 197 " percent is in range [1-100], default is 25.\n" 198 "--include-filter binary1,binary2,...\n" 199 " Trace only selected binaries in cs-etm instruction tracing.\n" 200 " Each entry is a binary path.\n" 201 "\n" 202 "Dwarf unwinding options:\n" 203 "--post-unwind=(yes|no) If `--call-graph dwarf` option is used, then the user's\n" 204 " stack will be recorded in perf.data and unwound while\n" 205 " recording by default. Use --post-unwind=yes to switch\n" 206 " to unwind after recording.\n" 207 "--no-unwind If `--call-graph dwarf` option is used, then the user's stack\n" 208 " will be unwound by default. Use this option to disable the\n" 209 " unwinding of the user's stack.\n" 210 "--no-callchain-joiner If `--call-graph dwarf` option is used, then by default\n" 211 " callchain joiner is used to break the 64k stack limit\n" 212 " and build more complete call graphs. However, the built\n" 213 " call graphs may not be correct in all cases.\n" 214 "--callchain-joiner-min-matching-nodes count\n" 215 " When callchain joiner is used, set the matched nodes needed to join\n" 216 " callchains. The count should be >= 1. By default it is 1.\n" 217 "--no-cut-samples Simpleperf uses a record buffer to cache records received from the kernel.\n" 218 " When the available space in the buffer reaches low level, it cuts part of\n" 219 " the stack data in samples. When the available space reaches critical level,\n" 220 " it drops all samples. This option makes simpleperf not cut samples when the\n" 221 " available space reaches low level.\n" 222 "\n" 223 "Recording file options:\n" 224 "--no-dump-kernel-symbols Don't dump kernel symbols in perf.data. By default\n" 225 " kernel symbols will be dumped when needed.\n" 226 "--no-dump-symbols Don't dump symbols in perf.data. By default symbols are\n" 227 " dumped in perf.data, to support reporting in another\n" 228 " environment.\n" 229 "-o record_file_name Set record file name, default is perf.data.\n" 230 "--size-limit SIZE[K|M|G] Stop recording after SIZE bytes of records.\n" 231 " Default is unlimited.\n" 232 "--symfs <dir> Look for files with symbols relative to this directory.\n" 233 " This option is used to provide files with symbol table and\n" 234 " debug information, which are used for unwinding and dumping symbols.\n" 235 "\n" 236 "Other options:\n" 237 "--exit-with-parent Stop recording when the process starting\n" 238 " simpleperf dies.\n" 239 "--start_profiling_fd fd_no After starting profiling, write \"STARTED\" to\n" 240 " <fd_no>, then close <fd_no>.\n" 241 "--stdio-controls-profiling Use stdin/stdout to pause/resume profiling.\n" 242 #if defined(__ANDROID__) 243 "--in-app We are already running in the app's context.\n" 244 "--tracepoint-events file_name Read tracepoint events from [file_name] instead of tracefs.\n" 245 #endif 246 #if 0 247 // Below options are only used internally and shouldn't be visible to the public. 248 "--out-fd <fd> Write perf.data to a file descriptor.\n" 249 "--stop-signal-fd <fd> Stop recording when fd is readable.\n" 250 #endif 251 // clang-format on 252 ), 253 system_wide_collection_(false), 254 branch_sampling_(0), 255 fp_callchain_sampling_(false), 256 dwarf_callchain_sampling_(false), 257 dump_stack_size_in_dwarf_sampling_(MAX_DUMP_STACK_SIZE), 258 unwind_dwarf_callchain_(true), 259 post_unwind_(false), 260 child_inherit_(true), 261 duration_in_sec_(0), 262 can_dump_kernel_symbols_(true), 263 dump_symbols_(true), 264 event_selection_set_(false), 265 mmap_page_range_(std::make_pair(1, DESIRED_PAGES_IN_MAPPED_BUFFER)), 266 record_filename_("perf.data"), 267 sample_record_count_(0), 268 lost_record_count_(0), 269 in_app_context_(false), 270 trace_offcpu_(false), 271 exclude_kernel_callchain_(false), 272 allow_callchain_joiner_(true), 273 callchain_joiner_min_matching_nodes_(1u), 274 last_record_timestamp_(0u) { 275 // If we run `adb shell simpleperf record xxx` and stop profiling by ctrl-c, adb closes 276 // sockets connecting simpleperf. After that, simpleperf will receive SIGPIPE when writing 277 // to stdout/stderr, which is a problem when we use '--app' option. So ignore SIGPIPE to 278 // finish properly. 279 signal(SIGPIPE, SIG_IGN); 280 } 281 282 bool Run(const std::vector<std::string>& args); 283 284 private: 285 bool ParseOptions(const std::vector<std::string>& args, 286 std::vector<std::string>* non_option_args); 287 bool AdjustPerfEventLimit(); 288 bool PrepareRecording(Workload* workload); 289 bool DoRecording(Workload* workload); 290 bool PostProcessRecording(const std::vector<std::string>& args); 291 bool TraceOffCpu(); 292 bool SetEventSelectionFlags(); 293 bool CreateAndInitRecordFile(); 294 std::unique_ptr<RecordFileWriter> CreateRecordFile( 295 const std::string& filename); 296 bool DumpKernelSymbol(); 297 bool DumpTracingData(); 298 bool DumpKernelMaps(); 299 bool DumpUserSpaceMaps(); 300 bool DumpProcessMaps(pid_t pid, const std::unordered_set<pid_t>& tids); 301 bool DumpAuxTraceInfo(); 302 bool ProcessRecord(Record* record); 303 bool ShouldOmitRecord(Record* record); 304 bool DumpMapsForRecord(Record* record); 305 bool SaveRecordForPostUnwinding(Record* record); 306 bool SaveRecordAfterUnwinding(Record* record); 307 bool SaveRecordWithoutUnwinding(Record* record); 308 bool ProcessJITDebugInfo(const std::vector<JITDebugInfo>& debug_info, bool sync_kernel_records); 309 bool ProcessControlCmd(IOEventLoop* loop); 310 311 void UpdateRecord(Record* record); 312 bool UnwindRecord(SampleRecord& r); 313 bool PostUnwindRecords(); 314 bool JoinCallChains(); 315 bool DumpAdditionalFeatures(const std::vector<std::string>& args); 316 bool DumpBuildIdFeature(); 317 bool DumpFileFeature(); 318 bool DumpMetaInfoFeature(bool kernel_symbols_available); 319 void CollectHitFileInfo(const SampleRecord& r); 320 321 std::unique_ptr<SampleSpeed> sample_speed_; 322 bool system_wide_collection_; 323 uint64_t branch_sampling_; 324 bool fp_callchain_sampling_; 325 bool dwarf_callchain_sampling_; 326 uint32_t dump_stack_size_in_dwarf_sampling_; 327 bool unwind_dwarf_callchain_; 328 bool post_unwind_; 329 std::unique_ptr<OfflineUnwinder> offline_unwinder_; 330 bool child_inherit_; 331 double duration_in_sec_; 332 bool can_dump_kernel_symbols_; 333 bool dump_symbols_; 334 std::string clockid_; 335 std::vector<int> cpus_; 336 EventSelectionSet event_selection_set_; 337 338 std::pair<size_t, size_t> mmap_page_range_; 339 size_t aux_buffer_size_ = kDefaultAuxBufferSize; 340 341 ThreadTree thread_tree_; 342 std::string record_filename_; 343 android::base::unique_fd out_fd_; 344 std::unique_ptr<RecordFileWriter> record_file_writer_; 345 android::base::unique_fd stop_signal_fd_; 346 347 uint64_t sample_record_count_; 348 uint64_t lost_record_count_; 349 android::base::unique_fd start_profiling_fd_; 350 bool stdio_controls_profiling_ = false; 351 352 std::string app_package_name_; 353 bool in_app_context_; 354 bool trace_offcpu_; 355 bool exclude_kernel_callchain_; 356 uint64_t size_limit_in_bytes_ = 0; 357 uint64_t max_sample_freq_ = DEFAULT_SAMPLE_FREQ_FOR_NONTRACEPOINT_EVENT; 358 size_t cpu_time_max_percent_ = 25; 359 360 // For CallChainJoiner 361 bool allow_callchain_joiner_; 362 size_t callchain_joiner_min_matching_nodes_; 363 std::unique_ptr<CallChainJoiner> callchain_joiner_; 364 bool allow_cutting_samples_ = true; 365 366 std::unique_ptr<JITDebugReader> jit_debug_reader_; 367 uint64_t last_record_timestamp_; // used to insert Mmap2Records for JIT debug info 368 TimeStat time_stat_; 369 EventAttrWithId dumping_attr_id_; 370 // In system wide recording, record if we have dumped map info for a process. 371 std::unordered_set<pid_t> dumped_processes_; 372 bool exclude_perf_ = false; 373 }; 374 375 bool RecordCommand::Run(const std::vector<std::string>& args) { 376 ScopedCurrentArch scoped_arch(GetMachineArch()); 377 if (!CheckPerfEventLimit()) { 378 return false; 379 } 380 AllowMoreOpenedFiles(); 381 382 std::vector<std::string> workload_args; 383 if (!ParseOptions(args, &workload_args)) { 384 return false; 385 } 386 if (!AdjustPerfEventLimit()) { 387 return false; 388 } 389 ScopedTempFiles scoped_temp_files(android::base::Dirname(record_filename_)); 390 if (!app_package_name_.empty() && !in_app_context_) { 391 // Some users want to profile non debuggable apps on rooted devices. If we use run-as, 392 // it will be impossible when using --app. So don't switch to app's context when we are 393 // root. 394 if (!IsRoot()) { 395 return RunInAppContext(app_package_name_, "record", args, workload_args.size(), 396 record_filename_, true); 397 } 398 } 399 std::unique_ptr<Workload> workload; 400 if (!workload_args.empty()) { 401 workload = Workload::CreateWorkload(workload_args); 402 if (workload == nullptr) { 403 return false; 404 } 405 } 406 time_stat_.prepare_recording_time = GetSystemClock(); 407 if (!PrepareRecording(workload.get())) { 408 return false; 409 } 410 time_stat_.start_recording_time = GetSystemClock(); 411 if (!DoRecording(workload.get())) { 412 return false; 413 } 414 return PostProcessRecording(args); 415 } 416 417 bool RecordCommand::PrepareRecording(Workload* workload) { 418 // 1. Prepare in other modules. 419 PrepareVdsoFile(); 420 421 // 2. Add default event type. 422 if (event_selection_set_.empty()) { 423 size_t group_id; 424 if (!event_selection_set_.AddEventType(default_measured_event_type, &group_id)) { 425 return false; 426 } 427 if (sample_speed_) { 428 event_selection_set_.SetSampleSpeed(group_id, *sample_speed_); 429 } 430 } 431 432 // 3. Process options before opening perf event files. 433 exclude_kernel_callchain_ = event_selection_set_.ExcludeKernel(); 434 if (trace_offcpu_ && !TraceOffCpu()) { 435 return false; 436 } 437 if (!SetEventSelectionFlags()) { 438 return false; 439 } 440 if (unwind_dwarf_callchain_) { 441 offline_unwinder_ = OfflineUnwinder::Create(false); 442 } 443 if (unwind_dwarf_callchain_ && allow_callchain_joiner_) { 444 callchain_joiner_.reset(new CallChainJoiner(DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE, 445 callchain_joiner_min_matching_nodes_, 446 false)); 447 } 448 449 // 4. Add monitored targets. 450 bool need_to_check_targets = false; 451 if (system_wide_collection_) { 452 event_selection_set_.AddMonitoredThreads({-1}); 453 } else if (!event_selection_set_.HasMonitoredTarget()) { 454 if (workload != nullptr) { 455 event_selection_set_.AddMonitoredProcesses({workload->GetPid()}); 456 event_selection_set_.SetEnableOnExec(true); 457 } else if (!app_package_name_.empty()) { 458 // If app process is not created, wait for it. This allows simpleperf starts before 459 // app process. In this way, we can have a better support of app start-up time profiling. 460 std::set<pid_t> pids = WaitForAppProcesses(app_package_name_); 461 event_selection_set_.AddMonitoredProcesses(pids); 462 need_to_check_targets = true; 463 } else { 464 LOG(ERROR) 465 << "No threads to monitor. Try `simpleperf help record` for help"; 466 return false; 467 } 468 } else { 469 need_to_check_targets = true; 470 } 471 // Profiling JITed/interpreted Java code is supported starting from Android P. 472 // Also support profiling art interpreter on host. 473 if (GetAndroidVersion() >= kAndroidVersionP || GetAndroidVersion() == 0) { 474 // JIT symfiles are stored in temporary files, and are deleted after recording. But if 475 // `-g --no-unwind` option is used, we want to keep symfiles to support unwinding in 476 // the debug-unwind cmd. 477 bool keep_symfiles = dwarf_callchain_sampling_ && !unwind_dwarf_callchain_; 478 bool sync_with_records = clockid_ == "monotonic"; 479 jit_debug_reader_.reset(new JITDebugReader(keep_symfiles, sync_with_records)); 480 // To profile java code, need to dump maps containing vdex files, which are not executable. 481 event_selection_set_.SetRecordNotExecutableMaps(true); 482 } 483 484 // 5. Open perf event files and create mapped buffers. 485 if (!event_selection_set_.OpenEventFiles(cpus_)) { 486 return false; 487 } 488 size_t record_buffer_size = system_wide_collection_ ? kSystemWideRecordBufferSize 489 : kRecordBufferSize; 490 if (!event_selection_set_.MmapEventFiles(mmap_page_range_.first, mmap_page_range_.second, 491 aux_buffer_size_, record_buffer_size, 492 allow_cutting_samples_, exclude_perf_)) { 493 return false; 494 } 495 auto callback = 496 std::bind(&RecordCommand::ProcessRecord, this, std::placeholders::_1); 497 if (!event_selection_set_.PrepareToReadMmapEventData(callback)) { 498 return false; 499 } 500 501 // 6. Create perf.data. 502 if (!CreateAndInitRecordFile()) { 503 return false; 504 } 505 506 // 7. Add read/signal/periodic Events. 507 if (need_to_check_targets && !event_selection_set_.StopWhenNoMoreTargets()) { 508 return false; 509 } 510 IOEventLoop* loop = event_selection_set_.GetIOEventLoop(); 511 auto exit_loop_callback = [loop]() { 512 return loop->ExitLoop(); 513 }; 514 if (!loop->AddSignalEvents({SIGCHLD, SIGINT, SIGTERM}, exit_loop_callback)) { 515 return false; 516 } 517 518 // Only add an event for SIGHUP if we didn't inherit SIG_IGN (e.g. from nohup). 519 if (!SignalIsIgnored(SIGHUP)) { 520 if (!loop->AddSignalEvent(SIGHUP, exit_loop_callback)) { 521 return false; 522 } 523 } 524 if (stop_signal_fd_ != -1) { 525 if (!loop->AddReadEvent(stop_signal_fd_, exit_loop_callback)) { 526 return false; 527 } 528 } 529 530 if (duration_in_sec_ != 0) { 531 if (!loop->AddPeriodicEvent(SecondToTimeval(duration_in_sec_), 532 [loop]() { return loop->ExitLoop(); })) { 533 return false; 534 } 535 } 536 if (stdio_controls_profiling_) { 537 if (!loop->AddReadEvent(0, [this, loop]() { return ProcessControlCmd(loop); })) { 538 return false; 539 } 540 } 541 if (jit_debug_reader_) { 542 auto callback = [this](const std::vector<JITDebugInfo>& debug_info, bool sync_kernel_records) { 543 return ProcessJITDebugInfo(debug_info, sync_kernel_records); 544 }; 545 if (!jit_debug_reader_->RegisterDebugInfoCallback(loop, callback)) { 546 return false; 547 } 548 if (!system_wide_collection_) { 549 std::set<pid_t> pids = event_selection_set_.GetMonitoredProcesses(); 550 for (pid_t tid : event_selection_set_.GetMonitoredThreads()) { 551 pid_t pid; 552 if (GetProcessForThread(tid, &pid)) { 553 pids.insert(pid); 554 } 555 } 556 for (pid_t pid : pids) { 557 if (!jit_debug_reader_->MonitorProcess(pid)) { 558 return false; 559 } 560 } 561 if (!jit_debug_reader_->ReadAllProcesses()) { 562 return false; 563 } 564 } 565 } 566 if (event_selection_set_.HasAuxTrace()) { 567 // ETM data is dumped to kernel buffer only when there is no thread traced by ETM. It happens 568 // either when all monitored threads are scheduled off cpu, or when all etm perf events are 569 // disabled. 570 // If ETM data isn't dumped to kernel buffer in time, overflow parts will be dropped. This 571 // makes less than expected data, especially in system wide recording. So add a periodic event 572 // to flush etm data by temporarily disable all perf events. 573 auto etm_flush = [this]() { 574 return event_selection_set_.SetEnableEvents(false) && 575 event_selection_set_.SetEnableEvents(true); 576 }; 577 if (!loop->AddPeriodicEvent(SecondToTimeval(kDefaultEtmDataFlushPeriodInSec), etm_flush)) { 578 return false; 579 } 580 } 581 return true; 582 } 583 584 bool RecordCommand::DoRecording(Workload* workload) { 585 // Write records in mapped buffers of perf_event_files to output file while workload is running. 586 if (workload != nullptr && !workload->IsStarted() && !workload->Start()) { 587 return false; 588 } 589 if (start_profiling_fd_.get() != -1) { 590 if (!android::base::WriteStringToFd("STARTED", start_profiling_fd_)) { 591 PLOG(ERROR) << "failed to write to start_profiling_fd_"; 592 } 593 start_profiling_fd_.reset(); 594 } 595 if (stdio_controls_profiling_) { 596 printf("started\n"); 597 fflush(stdout); 598 } 599 if (!event_selection_set_.GetIOEventLoop()->RunLoop()) { 600 return false; 601 } 602 time_stat_.stop_recording_time = GetSystemClock(); 603 if (!event_selection_set_.FinishReadMmapEventData()) { 604 return false; 605 } 606 time_stat_.finish_recording_time = GetSystemClock(); 607 return true; 608 } 609 610 static bool WriteRecordDataToOutFd(const std::string& in_filename, android::base::unique_fd out_fd) { 611 android::base::unique_fd in_fd(FileHelper::OpenReadOnly(in_filename)); 612 if (in_fd == -1) { 613 PLOG(ERROR) << "Failed to open " << in_filename; 614 return false; 615 } 616 char buf[8192]; 617 while (true) { 618 ssize_t n = TEMP_FAILURE_RETRY(read(in_fd, buf, sizeof(buf))); 619 if (n < 0) { 620 PLOG(ERROR) << "Failed to read " << in_filename; 621 return false; 622 } 623 if (n == 0) { 624 break; 625 } 626 if (!android::base::WriteFully(out_fd, buf, n)) { 627 PLOG(ERROR) << "Failed to write to out_fd"; 628 return false; 629 } 630 } 631 unlink(in_filename.c_str()); 632 return true; 633 } 634 635 bool RecordCommand::PostProcessRecording(const std::vector<std::string>& args) { 636 // 1. Post unwind dwarf callchain. 637 if (unwind_dwarf_callchain_ && post_unwind_) { 638 if (!PostUnwindRecords()) { 639 return false; 640 } 641 } 642 643 // 2. Optionally join Callchains. 644 if (callchain_joiner_) { 645 JoinCallChains(); 646 } 647 648 // 3. Dump additional features, and close record file. 649 if (!DumpAdditionalFeatures(args)) { 650 return false; 651 } 652 if (!record_file_writer_->Close()) { 653 return false; 654 } 655 if (out_fd_ != -1 && !WriteRecordDataToOutFd(record_filename_, std::move(out_fd_))) { 656 return false; 657 } 658 time_stat_.post_process_time = GetSystemClock(); 659 660 // 4. Show brief record result. 661 auto record_stat = event_selection_set_.GetRecordStat(); 662 if (event_selection_set_.HasAuxTrace()) { 663 LOG(INFO) << "Aux data traced: " << record_stat.aux_data_size; 664 if (record_stat.lost_aux_data_size != 0) { 665 LOG(INFO) << "Aux data lost in user space: " << record_stat.lost_aux_data_size; 666 } 667 } else { 668 std::string cut_samples; 669 if (record_stat.cut_stack_samples > 0) { 670 cut_samples = android::base::StringPrintf(" (cut %zu)", record_stat.cut_stack_samples); 671 } 672 lost_record_count_ += record_stat.lost_samples + record_stat.lost_non_samples; 673 LOG(INFO) << "Samples recorded: " << sample_record_count_ << cut_samples 674 << ". Samples lost: " << lost_record_count_ << "."; 675 LOG(DEBUG) << "In user space, dropped " << record_stat.lost_samples << " samples, " 676 << record_stat.lost_non_samples << " non samples, cut stack of " 677 << record_stat.cut_stack_samples << " samples."; 678 if (sample_record_count_ + lost_record_count_ != 0) { 679 double lost_percent = 680 static_cast<double>(lost_record_count_) / (lost_record_count_ + sample_record_count_); 681 constexpr double LOST_PERCENT_WARNING_BAR = 0.1; 682 if (lost_percent >= LOST_PERCENT_WARNING_BAR) { 683 LOG(WARNING) << "Lost " << (lost_percent * 100) << "% of samples, " 684 << "consider increasing mmap_pages(-m), " 685 << "or decreasing sample frequency(-f), " 686 << "or increasing sample period(-c)."; 687 } 688 } 689 if (callchain_joiner_) { 690 callchain_joiner_->DumpStat(); 691 } 692 } 693 LOG(DEBUG) << "Prepare recording time " 694 << (time_stat_.start_recording_time - time_stat_.prepare_recording_time) / 1e6 695 << " ms, recording time " 696 << (time_stat_.stop_recording_time - time_stat_.start_recording_time) / 1e6 697 << " ms, stop recording time " 698 << (time_stat_.finish_recording_time - time_stat_.stop_recording_time) / 1e6 699 << " ms, post process time " 700 << (time_stat_.post_process_time - time_stat_.finish_recording_time) / 1e6 << " ms."; 701 return true; 702 } 703 704 bool RecordCommand::ParseOptions(const std::vector<std::string>& args, 705 std::vector<std::string>* non_option_args) { 706 std::vector<size_t> wait_setting_speed_event_groups_; 707 size_t i; 708 for (i = 0; i < args.size() && !args[i].empty() && args[i][0] == '-'; ++i) { 709 if (args[i] == "-a") { 710 system_wide_collection_ = true; 711 } else if (args[i] == "--app") { 712 if (!NextArgumentOrError(args, &i)) { 713 return false; 714 } 715 app_package_name_ = args[i]; 716 } else if (args[i] == "--aux-buffer-size") { 717 if (!GetUintOption(args, &i, &aux_buffer_size_, 0, std::numeric_limits<size_t>::max(), 718 true)) { 719 return false; 720 } 721 if (!IsPowerOfTwo(aux_buffer_size_) || aux_buffer_size_ % sysconf(_SC_PAGE_SIZE)) { 722 LOG(ERROR) << "invalid aux buffer size: " << args[i]; 723 return false; 724 } 725 } else if (args[i] == "-b") { 726 branch_sampling_ = branch_sampling_type_map["any"]; 727 } else if (args[i] == "-c" || args[i] == "-f") { 728 uint64_t value; 729 if (!GetUintOption(args, &i, &value, 1)) { 730 return false; 731 } 732 if (args[i-1] == "-c") { 733 sample_speed_.reset(new SampleSpeed(0, value)); 734 } else { 735 if (value >= INT_MAX) { 736 LOG(ERROR) << "sample freq can't be bigger than INT_MAX."; 737 return false; 738 } 739 sample_speed_.reset(new SampleSpeed(value, 0)); 740 max_sample_freq_ = std::max(max_sample_freq_, value); 741 } 742 for (auto group_id : wait_setting_speed_event_groups_) { 743 event_selection_set_.SetSampleSpeed(group_id, *sample_speed_); 744 } 745 wait_setting_speed_event_groups_.clear(); 746 747 } else if (args[i] == "--call-graph") { 748 if (!NextArgumentOrError(args, &i)) { 749 return false; 750 } 751 std::vector<std::string> strs = android::base::Split(args[i], ","); 752 if (strs[0] == "fp") { 753 fp_callchain_sampling_ = true; 754 dwarf_callchain_sampling_ = false; 755 } else if (strs[0] == "dwarf") { 756 fp_callchain_sampling_ = false; 757 dwarf_callchain_sampling_ = true; 758 if (strs.size() > 1) { 759 uint64_t size; 760 if (!android::base::ParseUint(strs[1], &size)) { 761 LOG(ERROR) << "invalid dump stack size in --call-graph option: " << strs[1]; 762 return false; 763 } 764 if ((size & 7) != 0) { 765 LOG(ERROR) << "dump stack size " << size 766 << " is not 8-byte aligned."; 767 return false; 768 } 769 if (size >= MAX_DUMP_STACK_SIZE) { 770 LOG(ERROR) << "dump stack size " << size 771 << " is bigger than max allowed size " 772 << MAX_DUMP_STACK_SIZE << "."; 773 return false; 774 } 775 dump_stack_size_in_dwarf_sampling_ = static_cast<uint32_t>(size); 776 } 777 } else { 778 LOG(ERROR) << "unexpected argument for --call-graph option: " 779 << args[i]; 780 return false; 781 } 782 } else if (args[i] == "--clockid") { 783 if (!NextArgumentOrError(args, &i)) { 784 return false; 785 } 786 if (args[i] != "perf") { 787 if (!IsSettingClockIdSupported()) { 788 LOG(ERROR) << "Setting clockid is not supported by the kernel."; 789 return false; 790 } 791 if (clockid_map.find(args[i]) == clockid_map.end()) { 792 LOG(ERROR) << "Invalid clockid: " << args[i]; 793 return false; 794 } 795 } 796 clockid_ = args[i]; 797 } else if (args[i] == "--cpu") { 798 if (!NextArgumentOrError(args, &i)) { 799 return false; 800 } 801 cpus_ = GetCpusFromString(args[i]); 802 } else if (args[i] == "--cpu-percent") { 803 if (!GetUintOption(args, &i, &cpu_time_max_percent_, 1, 100)) { 804 return false; 805 } 806 } else if (args[i] == "--duration") { 807 if (!GetDoubleOption(args, &i, &duration_in_sec_, 1e-9)) { 808 return false; 809 } 810 } else if (args[i] == "-e") { 811 if (!NextArgumentOrError(args, &i)) { 812 return false; 813 } 814 std::vector<std::string> event_types = android::base::Split(args[i], ","); 815 for (auto& event_type : event_types) { 816 size_t group_id; 817 if (!event_selection_set_.AddEventType(event_type, &group_id)) { 818 return false; 819 } 820 if (sample_speed_) { 821 event_selection_set_.SetSampleSpeed(group_id, *sample_speed_); 822 } else { 823 wait_setting_speed_event_groups_.push_back(group_id); 824 } 825 } 826 } else if (args[i] == "--exclude-perf") { 827 exclude_perf_ = true; 828 } else if (args[i] == "--exit-with-parent") { 829 prctl(PR_SET_PDEATHSIG, SIGHUP, 0, 0, 0); 830 } else if (args[i] == "-g") { 831 fp_callchain_sampling_ = false; 832 dwarf_callchain_sampling_ = true; 833 } else if (args[i] == "--group") { 834 if (!NextArgumentOrError(args, &i)) { 835 return false; 836 } 837 std::vector<std::string> event_types = android::base::Split(args[i], ","); 838 size_t group_id; 839 if (!event_selection_set_.AddEventGroup(event_types, &group_id)) { 840 return false; 841 } 842 if (sample_speed_) { 843 event_selection_set_.SetSampleSpeed(group_id, *sample_speed_); 844 } else { 845 wait_setting_speed_event_groups_.push_back(group_id); 846 } 847 } else if (args[i] == "--in-app") { 848 in_app_context_ = true; 849 } else if (args[i] == "--include-filter") { 850 if (!NextArgumentOrError(args, &i)) { 851 return false; 852 } 853 event_selection_set_.SetIncludeFilters(android::base::Split(args[i], ",")); 854 } else if (args[i] == "-j") { 855 if (!NextArgumentOrError(args, &i)) { 856 return false; 857 } 858 std::vector<std::string> branch_sampling_types = 859 android::base::Split(args[i], ","); 860 for (auto& type : branch_sampling_types) { 861 auto it = branch_sampling_type_map.find(type); 862 if (it == branch_sampling_type_map.end()) { 863 LOG(ERROR) << "unrecognized branch sampling filter: " << type; 864 return false; 865 } 866 branch_sampling_ |= it->second; 867 } 868 } else if (args[i] == "-m") { 869 uint64_t pages; 870 if (!GetUintOption(args, &i, &pages)) { 871 return false; 872 } 873 if (!IsPowerOfTwo(pages)) { 874 LOG(ERROR) << "Invalid mmap_pages: '" << args[i] << "'"; 875 return false; 876 } 877 mmap_page_range_.first = mmap_page_range_.second = pages; 878 } else if (args[i] == "--no-dump-kernel-symbols") { 879 can_dump_kernel_symbols_ = false; 880 } else if (args[i] == "--no-dump-symbols") { 881 dump_symbols_ = false; 882 } else if (args[i] == "--no-inherit") { 883 child_inherit_ = false; 884 } else if (args[i] == "--no-unwind") { 885 unwind_dwarf_callchain_ = false; 886 } else if (args[i] == "--no-callchain-joiner") { 887 allow_callchain_joiner_ = false; 888 } else if (args[i] == "--callchain-joiner-min-matching-nodes") { 889 if (!GetUintOption(args, &i, &callchain_joiner_min_matching_nodes_, 1)) { 890 return false; 891 } 892 } else if (args[i] == "--no-cut-samples") { 893 allow_cutting_samples_ = false; 894 } else if (args[i] == "-o") { 895 if (!NextArgumentOrError(args, &i)) { 896 return false; 897 } 898 record_filename_ = args[i]; 899 } else if (args[i] == "--out-fd") { 900 int fd; 901 if (!GetUintOption(args, &i, &fd)) { 902 return false; 903 } 904 out_fd_.reset(fd); 905 } else if (args[i] == "-p") { 906 if (!NextArgumentOrError(args, &i)) { 907 return false; 908 } 909 std::set<pid_t> pids; 910 if (!GetValidThreadsFromThreadString(args[i], &pids)) { 911 return false; 912 } 913 event_selection_set_.AddMonitoredProcesses(pids); 914 } else if (android::base::StartsWith(args[i], "--post-unwind")) { 915 if (args[i] == "--post-unwind" || args[i] == "--post-unwind=yes") { 916 post_unwind_ = true; 917 } else if (args[i] == "--post-unwind=no") { 918 post_unwind_ = false; 919 } else { 920 LOG(ERROR) << "unexpected option " << args[i]; 921 return false; 922 } 923 } else if (args[i] == "--size-limit") { 924 if (!GetUintOption(args, &i, &size_limit_in_bytes_, 1, std::numeric_limits<uint64_t>::max(), 925 true)) { 926 return false; 927 } 928 } else if (args[i] == "--start_profiling_fd") { 929 int fd; 930 if (!GetUintOption(args, &i, &fd)) { 931 return false; 932 } 933 start_profiling_fd_.reset(fd); 934 } else if (args[i] == "--stdio-controls-profiling") { 935 stdio_controls_profiling_ = true; 936 } else if (args[i] == "--stop-signal-fd") { 937 int fd; 938 if (!GetUintOption(args, &i, &fd)) { 939 return false; 940 } 941 stop_signal_fd_.reset(fd); 942 } else if (args[i] == "--symfs") { 943 if (!NextArgumentOrError(args, &i)) { 944 return false; 945 } 946 if (!Dso::SetSymFsDir(args[i])) { 947 return false; 948 } 949 } else if (args[i] == "-t") { 950 if (!NextArgumentOrError(args, &i)) { 951 return false; 952 } 953 std::set<pid_t> tids; 954 if (!GetValidThreadsFromThreadString(args[i], &tids)) { 955 return false; 956 } 957 event_selection_set_.AddMonitoredThreads(tids); 958 } else if (args[i] == "--trace-offcpu") { 959 trace_offcpu_ = true; 960 } else if (args[i] == "--tracepoint-events") { 961 if (!NextArgumentOrError(args, &i)) { 962 return false; 963 } 964 if (!SetTracepointEventsFilePath(args[i])) { 965 return false; 966 } 967 } else if (args[i] == "--") { 968 i++; 969 break; 970 } else { 971 ReportUnknownOption(args, i); 972 return false; 973 } 974 } 975 976 if (!dwarf_callchain_sampling_) { 977 if (!unwind_dwarf_callchain_) { 978 LOG(ERROR) 979 << "--no-unwind is only used with `--call-graph dwarf` option."; 980 return false; 981 } 982 unwind_dwarf_callchain_ = false; 983 } 984 if (post_unwind_) { 985 if (!dwarf_callchain_sampling_ || !unwind_dwarf_callchain_) { 986 post_unwind_ = false; 987 } 988 } 989 990 if (fp_callchain_sampling_) { 991 if (GetBuildArch() == ARCH_ARM) { 992 LOG(WARNING) << "`--callgraph fp` option doesn't work well on arm architecture, " 993 << "consider using `-g` option or profiling on aarch64 architecture."; 994 } 995 } 996 997 if (system_wide_collection_ && event_selection_set_.HasMonitoredTarget()) { 998 LOG(ERROR) << "Record system wide and existing processes/threads can't be " 999 "used at the same time."; 1000 return false; 1001 } 1002 1003 if (system_wide_collection_ && !IsRoot()) { 1004 LOG(ERROR) << "System wide profiling needs root privilege."; 1005 return false; 1006 } 1007 1008 if (dump_symbols_ && can_dump_kernel_symbols_) { 1009 // No need to dump kernel symbols as we will dump all required symbols. 1010 can_dump_kernel_symbols_ = false; 1011 } 1012 if (clockid_.empty()) { 1013 clockid_ = IsSettingClockIdSupported() ? "monotonic" : "perf"; 1014 } 1015 1016 non_option_args->clear(); 1017 for (; i < args.size(); ++i) { 1018 non_option_args->push_back(args[i]); 1019 } 1020 return true; 1021 } 1022 1023 bool RecordCommand::AdjustPerfEventLimit() { 1024 bool set_prop = false; 1025 // 1. Adjust max_sample_rate. 1026 uint64_t cur_max_freq; 1027 if (GetMaxSampleFrequency(&cur_max_freq) && cur_max_freq < max_sample_freq_ && 1028 !SetMaxSampleFrequency(max_sample_freq_)) { 1029 set_prop = true; 1030 } 1031 // 2. Adjust perf_cpu_time_max_percent. 1032 size_t cur_percent; 1033 if (GetCpuTimeMaxPercent(&cur_percent) && cur_percent != cpu_time_max_percent_ && 1034 !SetCpuTimeMaxPercent(cpu_time_max_percent_)) { 1035 set_prop = true; 1036 } 1037 // 3. Adjust perf_event_mlock_kb. 1038 long cpus = sysconf(_SC_NPROCESSORS_CONF); 1039 uint64_t mlock_kb = cpus * (mmap_page_range_.second + 1) * 4; 1040 if (event_selection_set_.HasAuxTrace()) { 1041 mlock_kb += cpus * aux_buffer_size_ / 1024; 1042 } 1043 uint64_t cur_mlock_kb; 1044 if (GetPerfEventMlockKb(&cur_mlock_kb) && cur_mlock_kb < mlock_kb && 1045 !SetPerfEventMlockKb(mlock_kb)) { 1046 set_prop = true; 1047 } 1048 1049 if (GetAndroidVersion() >= kAndroidVersionP + 1 && set_prop && !in_app_context_) { 1050 return SetPerfEventLimits(std::max(max_sample_freq_, cur_max_freq), cpu_time_max_percent_, 1051 std::max(mlock_kb, cur_mlock_kb)); 1052 } 1053 return true; 1054 } 1055 1056 bool RecordCommand::TraceOffCpu() { 1057 if (FindEventTypeByName("sched:sched_switch") == nullptr) { 1058 LOG(ERROR) << "Can't trace off cpu because sched:sched_switch event is not available"; 1059 return false; 1060 } 1061 for (auto& event_type : event_selection_set_.GetTracepointEvents()) { 1062 if (event_type->name == "sched:sched_switch") { 1063 LOG(ERROR) << "Trace offcpu can't be used together with sched:sched_switch event"; 1064 return false; 1065 } 1066 } 1067 if (!IsDumpingRegsForTracepointEventsSupported()) { 1068 LOG(ERROR) << "Dumping regs for tracepoint events is not supported by the kernel"; 1069 return false; 1070 } 1071 return event_selection_set_.AddEventType("sched:sched_switch"); 1072 } 1073 1074 bool RecordCommand::SetEventSelectionFlags() { 1075 event_selection_set_.SampleIdAll(); 1076 if (!event_selection_set_.SetBranchSampling(branch_sampling_)) { 1077 return false; 1078 } 1079 if (fp_callchain_sampling_) { 1080 event_selection_set_.EnableFpCallChainSampling(); 1081 } else if (dwarf_callchain_sampling_) { 1082 if (!event_selection_set_.EnableDwarfCallChainSampling( 1083 dump_stack_size_in_dwarf_sampling_)) { 1084 return false; 1085 } 1086 } 1087 event_selection_set_.SetInherit(child_inherit_); 1088 if (clockid_ != "perf") { 1089 event_selection_set_.SetClockId(clockid_map[clockid_]); 1090 } 1091 return true; 1092 } 1093 1094 bool RecordCommand::CreateAndInitRecordFile() { 1095 record_file_writer_ = CreateRecordFile(record_filename_); 1096 if (record_file_writer_ == nullptr) { 1097 return false; 1098 } 1099 // Use first perf_event_attr and first event id to dump mmap and comm records. 1100 dumping_attr_id_ = event_selection_set_.GetEventAttrWithId()[0]; 1101 return DumpKernelSymbol() && DumpTracingData() && DumpKernelMaps() && DumpUserSpaceMaps() && 1102 DumpAuxTraceInfo(); 1103 } 1104 1105 std::unique_ptr<RecordFileWriter> RecordCommand::CreateRecordFile( 1106 const std::string& filename) { 1107 std::unique_ptr<RecordFileWriter> writer = 1108 RecordFileWriter::CreateInstance(filename); 1109 if (writer == nullptr) { 1110 return nullptr; 1111 } 1112 1113 if (!writer->WriteAttrSection(event_selection_set_.GetEventAttrWithId())) { 1114 return nullptr; 1115 } 1116 return writer; 1117 } 1118 1119 bool RecordCommand::DumpKernelSymbol() { 1120 if (can_dump_kernel_symbols_) { 1121 std::string kallsyms; 1122 if (event_selection_set_.NeedKernelSymbol() && 1123 CheckKernelSymbolAddresses()) { 1124 if (!android::base::ReadFileToString("/proc/kallsyms", &kallsyms)) { 1125 PLOG(ERROR) << "failed to read /proc/kallsyms"; 1126 return false; 1127 } 1128 KernelSymbolRecord r(kallsyms); 1129 if (!ProcessRecord(&r)) { 1130 return false; 1131 } 1132 } 1133 } 1134 return true; 1135 } 1136 1137 bool RecordCommand::DumpTracingData() { 1138 std::vector<const EventType*> tracepoint_event_types = 1139 event_selection_set_.GetTracepointEvents(); 1140 if (tracepoint_event_types.empty() || !CanRecordRawData() || in_app_context_) { 1141 return true; // No need to dump tracing data, or can't do it. 1142 } 1143 std::vector<char> tracing_data; 1144 if (!GetTracingData(tracepoint_event_types, &tracing_data)) { 1145 return false; 1146 } 1147 TracingDataRecord record(tracing_data); 1148 if (!ProcessRecord(&record)) { 1149 return false; 1150 } 1151 return true; 1152 } 1153 1154 bool RecordCommand::DumpKernelMaps() { 1155 KernelMmap kernel_mmap; 1156 std::vector<KernelMmap> module_mmaps; 1157 GetKernelAndModuleMmaps(&kernel_mmap, &module_mmaps); 1158 1159 MmapRecord mmap_record(*dumping_attr_id_.attr, true, UINT_MAX, 0, kernel_mmap.start_addr, 1160 kernel_mmap.len, 0, kernel_mmap.filepath, dumping_attr_id_.ids[0]); 1161 if (!ProcessRecord(&mmap_record)) { 1162 return false; 1163 } 1164 for (auto& module_mmap : module_mmaps) { 1165 MmapRecord mmap_record(*dumping_attr_id_.attr, true, UINT_MAX, 0, module_mmap.start_addr, 1166 module_mmap.len, 0, module_mmap.filepath, dumping_attr_id_.ids[0]); 1167 if (!ProcessRecord(&mmap_record)) { 1168 return false; 1169 } 1170 } 1171 return true; 1172 } 1173 1174 bool RecordCommand::DumpUserSpaceMaps() { 1175 // For system_wide profiling: 1176 // If no aux tracing, maps of a process is dumped when needed (first time a sample hits 1177 // that process). 1178 // If aux tracing, we don't know which maps will be needed, so dump all process maps. 1179 if (system_wide_collection_ && !event_selection_set_.HasAuxTrace()) { 1180 return true; 1181 } 1182 // Map from process id to a set of thread ids in that process. 1183 std::unordered_map<pid_t, std::unordered_set<pid_t>> process_map; 1184 if (system_wide_collection_) { 1185 for (auto pid : GetAllProcesses()) { 1186 process_map[pid] = std::unordered_set<pid_t>(); 1187 } 1188 } else { 1189 for (pid_t pid : event_selection_set_.GetMonitoredProcesses()) { 1190 std::vector<pid_t> tids = GetThreadsInProcess(pid); 1191 process_map[pid].insert(tids.begin(), tids.end()); 1192 } 1193 for (pid_t tid : event_selection_set_.GetMonitoredThreads()) { 1194 pid_t pid; 1195 if (GetProcessForThread(tid, &pid)) { 1196 process_map[pid].insert(tid); 1197 } 1198 } 1199 } 1200 1201 // Dump each process. 1202 for (auto& pair : process_map) { 1203 if (!DumpProcessMaps(pair.first, pair.second)) { 1204 return false; 1205 } 1206 } 1207 return true; 1208 } 1209 1210 bool RecordCommand::DumpProcessMaps(pid_t pid, const std::unordered_set<pid_t>& tids) { 1211 // Dump mmap records. 1212 std::vector<ThreadMmap> thread_mmaps; 1213 if (!GetThreadMmapsInProcess(pid, &thread_mmaps)) { 1214 // The process may exit before we get its info. 1215 return true; 1216 } 1217 const perf_event_attr& attr = *dumping_attr_id_.attr; 1218 uint64_t event_id = dumping_attr_id_.ids[0]; 1219 for (const auto& map : thread_mmaps) { 1220 if (!(map.prot & PROT_EXEC) && !event_selection_set_.RecordNotExecutableMaps()) { 1221 continue; 1222 } 1223 Mmap2Record record(attr, false, pid, pid, map.start_addr, map.len, 1224 map.pgoff, map.prot, map.name, event_id, last_record_timestamp_); 1225 if (!ProcessRecord(&record)) { 1226 return false; 1227 } 1228 } 1229 // Dump process name. 1230 std::string name = GetCompleteProcessName(pid); 1231 if (!name.empty()) { 1232 CommRecord record(attr, pid, pid, name, event_id, last_record_timestamp_); 1233 if (!ProcessRecord(&record)) { 1234 return false; 1235 } 1236 } 1237 // Dump thread info. 1238 for (const auto& tid : tids) { 1239 if (tid != pid && GetThreadName(tid, &name)) { 1240 CommRecord comm_record(attr, pid, tid, name, event_id, last_record_timestamp_); 1241 if (!ProcessRecord(&comm_record)) { 1242 return false; 1243 } 1244 } 1245 } 1246 return true; 1247 } 1248 1249 bool RecordCommand::ProcessRecord(Record* record) { 1250 UpdateRecord(record); 1251 if (ShouldOmitRecord(record)) { 1252 return true; 1253 } 1254 if (size_limit_in_bytes_ > 0u) { 1255 if (size_limit_in_bytes_ < record_file_writer_->GetDataSectionSize()) { 1256 return event_selection_set_.GetIOEventLoop()->ExitLoop(); 1257 } 1258 } 1259 if (jit_debug_reader_ && !jit_debug_reader_->UpdateRecord(record)) { 1260 return false; 1261 } 1262 last_record_timestamp_ = std::max(last_record_timestamp_, record->Timestamp()); 1263 // In system wide recording, maps are dumped when they are needed by records. 1264 if (system_wide_collection_ && !DumpMapsForRecord(record)) { 1265 return false; 1266 } 1267 if (unwind_dwarf_callchain_) { 1268 if (post_unwind_) { 1269 return SaveRecordForPostUnwinding(record); 1270 } 1271 return SaveRecordAfterUnwinding(record); 1272 } 1273 return SaveRecordWithoutUnwinding(record); 1274 } 1275 1276 bool RecordCommand::DumpAuxTraceInfo() { 1277 if (event_selection_set_.HasAuxTrace()) { 1278 AuxTraceInfoRecord auxtrace_info = ETMRecorder::GetInstance().CreateAuxTraceInfoRecord(); 1279 return ProcessRecord(&auxtrace_info); 1280 } 1281 return true; 1282 } 1283 1284 template <typename MmapRecordType> 1285 bool MapOnlyExistInMemory(MmapRecordType* record) { 1286 return !record->InKernel() && MappedFileOnlyExistInMemory(record->filename); 1287 } 1288 1289 bool RecordCommand::ShouldOmitRecord(Record* record) { 1290 if (jit_debug_reader_) { 1291 // To profile jitted Java code, we need PROT_JIT_SYMFILE_MAP maps not overlapped by maps for 1292 // [anon:dalvik-jit-code-cache]. To profile interpreted Java code, we record maps that 1293 // are not executable. Some non-exec maps (like those for stack, heap) provide misleading map 1294 // entries for unwinding, as in http://b/77236599. So it is better to remove 1295 // dalvik-jit-code-cache and other maps that only exist in memory. 1296 switch (record->type()) { 1297 case PERF_RECORD_MMAP: 1298 return MapOnlyExistInMemory(static_cast<MmapRecord*>(record)); 1299 case PERF_RECORD_MMAP2: 1300 return MapOnlyExistInMemory(static_cast<Mmap2Record*>(record)); 1301 } 1302 } 1303 return false; 1304 } 1305 1306 bool RecordCommand::DumpMapsForRecord(Record* record) { 1307 if (record->type() == PERF_RECORD_SAMPLE) { 1308 pid_t pid = static_cast<SampleRecord*>(record)->tid_data.pid; 1309 if (dumped_processes_.find(pid) == dumped_processes_.end()) { 1310 // Dump map info and all thread names for that process. 1311 std::vector<pid_t> tids = GetThreadsInProcess(pid); 1312 if (!tids.empty() && 1313 !DumpProcessMaps(pid, std::unordered_set<pid_t>(tids.begin(), tids.end()))) { 1314 return false; 1315 } 1316 dumped_processes_.insert(pid); 1317 } 1318 } 1319 return true; 1320 } 1321 1322 bool RecordCommand::SaveRecordForPostUnwinding(Record* record) { 1323 if (!record_file_writer_->WriteRecord(*record)) { 1324 LOG(ERROR) << "If there isn't enough space for storing profiling data, consider using " 1325 << "--no-post-unwind option."; 1326 return false; 1327 } 1328 return true; 1329 } 1330 1331 bool RecordCommand::SaveRecordAfterUnwinding(Record* record) { 1332 if (record->type() == PERF_RECORD_SAMPLE) { 1333 auto& r = *static_cast<SampleRecord*>(record); 1334 // AdjustCallChainGeneratedByKernel() should go before UnwindRecord(). Because we don't want 1335 // to adjust callchains generated by dwarf unwinder. 1336 r.AdjustCallChainGeneratedByKernel(); 1337 if (!UnwindRecord(r)) { 1338 return false; 1339 } 1340 // ExcludeKernelCallChain() should go after UnwindRecord() to notice the generated user call 1341 // chain. 1342 if (r.InKernel() && exclude_kernel_callchain_ && !r.ExcludeKernelCallChain()) { 1343 // If current record contains no user callchain, skip it. 1344 return true; 1345 } 1346 sample_record_count_++; 1347 } else if (record->type() == PERF_RECORD_LOST) { 1348 lost_record_count_ += static_cast<LostRecord*>(record)->lost; 1349 } else { 1350 thread_tree_.Update(*record); 1351 } 1352 return record_file_writer_->WriteRecord(*record); 1353 } 1354 1355 bool RecordCommand::SaveRecordWithoutUnwinding(Record* record) { 1356 if (record->type() == PERF_RECORD_SAMPLE) { 1357 auto& r = *static_cast<SampleRecord*>(record); 1358 if (fp_callchain_sampling_ || dwarf_callchain_sampling_) { 1359 r.AdjustCallChainGeneratedByKernel(); 1360 } 1361 if (r.InKernel() && exclude_kernel_callchain_ && !r.ExcludeKernelCallChain()) { 1362 // If current record contains no user callchain, skip it. 1363 return true; 1364 } 1365 sample_record_count_++; 1366 } else if (record->type() == PERF_RECORD_LOST) { 1367 lost_record_count_ += static_cast<LostRecord*>(record)->lost; 1368 } 1369 return record_file_writer_->WriteRecord(*record); 1370 } 1371 1372 bool RecordCommand::ProcessJITDebugInfo(const std::vector<JITDebugInfo>& debug_info, 1373 bool sync_kernel_records) { 1374 EventAttrWithId attr_id = event_selection_set_.GetEventAttrWithId()[0]; 1375 for (auto& info : debug_info) { 1376 if (info.type == JITDebugInfo::JIT_DEBUG_JIT_CODE) { 1377 uint64_t timestamp = jit_debug_reader_->SyncWithRecords() ? info.timestamp 1378 : last_record_timestamp_; 1379 Mmap2Record record(*attr_id.attr, false, info.pid, info.pid, 1380 info.jit_code_addr, info.jit_code_len, 0, map_flags::PROT_JIT_SYMFILE_MAP, 1381 info.file_path, attr_id.ids[0], timestamp); 1382 if (!ProcessRecord(&record)) { 1383 return false; 1384 } 1385 } else { 1386 if (info.extracted_dex_file_map) { 1387 ThreadMmap& map = *info.extracted_dex_file_map; 1388 uint64_t timestamp = jit_debug_reader_->SyncWithRecords() ? info.timestamp 1389 : last_record_timestamp_; 1390 Mmap2Record record(*attr_id.attr, false, info.pid, info.pid, map.start_addr, map.len, 1391 map.pgoff, map.prot, map.name, attr_id.ids[0], timestamp); 1392 if (!ProcessRecord(&record)) { 1393 return false; 1394 } 1395 } 1396 thread_tree_.AddDexFileOffset(info.file_path, info.dex_file_offset); 1397 } 1398 } 1399 // We want to let samples see the most recent JIT maps generated before them, but no JIT maps 1400 // generated after them. So process existing samples each time generating new JIT maps. We prefer 1401 // to process samples after processing JIT maps. Because some of the samples may hit the new JIT 1402 // maps, and we want to report them properly. 1403 if (sync_kernel_records && !event_selection_set_.SyncKernelBuffer()) { 1404 return false; 1405 } 1406 return true; 1407 } 1408 1409 bool RecordCommand::ProcessControlCmd(IOEventLoop* loop) { 1410 char* line = nullptr; 1411 size_t line_length = 0; 1412 if (getline(&line, &line_length, stdin) == -1) { 1413 free(line); 1414 // When the simpleperf Java API destroys the simpleperf process, it also closes the stdin pipe. 1415 // So we may see EOF of stdin. 1416 return loop->ExitLoop(); 1417 } 1418 std::string cmd = android::base::Trim(line); 1419 free(line); 1420 LOG(DEBUG) << "process control cmd: " << cmd; 1421 bool result = false; 1422 if (cmd == "pause") { 1423 result = event_selection_set_.SetEnableEvents(false); 1424 } else if (cmd == "resume") { 1425 result = event_selection_set_.SetEnableEvents(true); 1426 } else { 1427 LOG(ERROR) << "unknown control cmd: " << cmd; 1428 } 1429 printf("%s\n", result ? "ok" : "error"); 1430 fflush(stdout); 1431 return result; 1432 } 1433 1434 template <class RecordType> 1435 void UpdateMmapRecordForEmbeddedPath(RecordType& r, bool has_prot, uint32_t prot) { 1436 if (r.InKernel()) { 1437 return; 1438 } 1439 std::string filename = r.filename; 1440 bool name_changed = false; 1441 // Some vdex files in map files are marked with deleted flag, but they exist in the file system. 1442 // It may be because a new file is used to replace the old one, but still worth to try. 1443 if (android::base::EndsWith(filename, " (deleted)")) { 1444 filename.resize(filename.size() - 10); 1445 name_changed = true; 1446 } 1447 if (r.data->pgoff != 0 && (!has_prot || (prot & PROT_EXEC))) { 1448 // For the case of a shared library "foobar.so" embedded 1449 // inside an APK, we rewrite the original MMAP from 1450 // ["path.apk" offset=X] to ["path.apk!/foobar.so" offset=W] 1451 // so as to make the library name explicit. This update is 1452 // done here (as part of the record operation) as opposed to 1453 // on the host during the report, since we want to report 1454 // the correct library name even if the the APK in question 1455 // is not present on the host. The new offset W is 1456 // calculated to be with respect to the start of foobar.so, 1457 // not to the start of path.apk. 1458 EmbeddedElf* ee = ApkInspector::FindElfInApkByOffset(filename, r.data->pgoff); 1459 if (ee != nullptr) { 1460 // Compute new offset relative to start of elf in APK. 1461 auto data = *r.data; 1462 data.pgoff -= ee->entry_offset(); 1463 r.SetDataAndFilename(data, GetUrlInApk(filename, ee->entry_name())); 1464 return; 1465 } 1466 } 1467 std::string zip_path; 1468 std::string entry_name; 1469 if (ParseExtractedInMemoryPath(filename, &zip_path, &entry_name)) { 1470 filename = GetUrlInApk(zip_path, entry_name); 1471 name_changed = true; 1472 } 1473 if (name_changed) { 1474 auto data = *r.data; 1475 r.SetDataAndFilename(data, filename); 1476 } 1477 } 1478 1479 void RecordCommand::UpdateRecord(Record* record) { 1480 if (record->type() == PERF_RECORD_MMAP) { 1481 UpdateMmapRecordForEmbeddedPath(*static_cast<MmapRecord*>(record), false, 0); 1482 } else if (record->type() == PERF_RECORD_MMAP2) { 1483 auto r = static_cast<Mmap2Record*>(record); 1484 UpdateMmapRecordForEmbeddedPath(*r, true, r->data->prot); 1485 } else if (record->type() == PERF_RECORD_COMM) { 1486 auto r = static_cast<CommRecord*>(record); 1487 if (r->data->pid == r->data->tid) { 1488 std::string s = GetCompleteProcessName(r->data->pid); 1489 if (!s.empty()) { 1490 r->SetCommandName(s); 1491 } 1492 } 1493 } 1494 } 1495 1496 bool RecordCommand::UnwindRecord(SampleRecord& r) { 1497 if ((r.sample_type & PERF_SAMPLE_CALLCHAIN) && 1498 (r.sample_type & PERF_SAMPLE_REGS_USER) && 1499 (r.regs_user_data.reg_mask != 0) && 1500 (r.sample_type & PERF_SAMPLE_STACK_USER) && 1501 (r.GetValidStackSize() > 0)) { 1502 ThreadEntry* thread = 1503 thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid); 1504 RegSet regs(r.regs_user_data.abi, r.regs_user_data.reg_mask, r.regs_user_data.regs); 1505 std::vector<uint64_t> ips; 1506 std::vector<uint64_t> sps; 1507 if (!offline_unwinder_->UnwindCallChain(*thread, regs, r.stack_user_data.data, 1508 r.GetValidStackSize(), &ips, &sps)) { 1509 return false; 1510 } 1511 // The unwinding may fail if JIT debug info isn't the latest. In this case, read JIT debug info 1512 // from the process and retry unwinding. 1513 if (jit_debug_reader_ && !post_unwind_ && 1514 offline_unwinder_->IsCallChainBrokenForIncompleteJITDebugInfo()) { 1515 jit_debug_reader_->ReadProcess(r.tid_data.pid); 1516 jit_debug_reader_->FlushDebugInfo(r.Timestamp()); 1517 if (!offline_unwinder_->UnwindCallChain(*thread, regs, r.stack_user_data.data, 1518 r.GetValidStackSize(), &ips, &sps)) { 1519 return false; 1520 } 1521 } 1522 r.ReplaceRegAndStackWithCallChain(ips); 1523 if (callchain_joiner_) { 1524 return callchain_joiner_->AddCallChain(r.tid_data.pid, r.tid_data.tid, 1525 CallChainJoiner::ORIGINAL_OFFLINE, ips, sps); 1526 } 1527 } 1528 return true; 1529 } 1530 1531 bool RecordCommand::PostUnwindRecords() { 1532 // 1. Move records from record_filename_ to a temporary file. 1533 if (!record_file_writer_->Close()) { 1534 return false; 1535 } 1536 record_file_writer_.reset(); 1537 std::unique_ptr<TemporaryFile> tmp_file = ScopedTempFiles::CreateTempFile(); 1538 if (!Workload::RunCmd({"mv", record_filename_, tmp_file->path})) { 1539 return false; 1540 } 1541 std::unique_ptr<RecordFileReader> reader = RecordFileReader::CreateInstance(tmp_file->path); 1542 if (!reader) { 1543 return false; 1544 } 1545 1546 // 2. Read records from the temporary file, and write unwound records back to record_filename_. 1547 record_file_writer_ = CreateRecordFile(record_filename_); 1548 if (!record_file_writer_) { 1549 return false; 1550 } 1551 sample_record_count_ = 0; 1552 lost_record_count_ = 0; 1553 auto callback = [this](std::unique_ptr<Record> record) { 1554 return SaveRecordAfterUnwinding(record.get()); 1555 }; 1556 return reader->ReadDataSection(callback); 1557 } 1558 1559 bool RecordCommand::JoinCallChains() { 1560 // 1. Prepare joined callchains. 1561 if (!callchain_joiner_->JoinCallChains()) { 1562 return false; 1563 } 1564 // 2. Move records from record_filename_ to a temporary file. 1565 if (!record_file_writer_->Close()) { 1566 return false; 1567 } 1568 record_file_writer_.reset(); 1569 std::unique_ptr<TemporaryFile> tmp_file = ScopedTempFiles::CreateTempFile(); 1570 if (!Workload::RunCmd({"mv", record_filename_, tmp_file->path})) { 1571 return false; 1572 } 1573 1574 // 3. Read records from the temporary file, and write record with joined call chains back 1575 // to record_filename_. 1576 std::unique_ptr<RecordFileReader> reader = RecordFileReader::CreateInstance(tmp_file->path); 1577 record_file_writer_ = CreateRecordFile(record_filename_); 1578 if (!reader || !record_file_writer_) { 1579 return false; 1580 } 1581 1582 auto record_callback = [&](std::unique_ptr<Record> r) { 1583 if (r->type() != PERF_RECORD_SAMPLE) { 1584 return record_file_writer_->WriteRecord(*r); 1585 } 1586 SampleRecord& sr = *static_cast<SampleRecord*>(r.get()); 1587 if (!sr.HasUserCallChain()) { 1588 return record_file_writer_->WriteRecord(sr); 1589 } 1590 pid_t pid; 1591 pid_t tid; 1592 CallChainJoiner::ChainType type; 1593 std::vector<uint64_t> ips; 1594 std::vector<uint64_t> sps; 1595 if (!callchain_joiner_->GetNextCallChain(pid, tid, type, ips, sps)) { 1596 return false; 1597 } 1598 CHECK_EQ(type, CallChainJoiner::JOINED_OFFLINE); 1599 CHECK_EQ(pid, static_cast<pid_t>(sr.tid_data.pid)); 1600 CHECK_EQ(tid, static_cast<pid_t>(sr.tid_data.tid)); 1601 sr.UpdateUserCallChain(ips); 1602 return record_file_writer_->WriteRecord(sr); 1603 }; 1604 return reader->ReadDataSection(record_callback); 1605 } 1606 1607 bool RecordCommand::DumpAdditionalFeatures( 1608 const std::vector<std::string>& args) { 1609 // Read data section of perf.data to collect hit file information. 1610 thread_tree_.ClearThreadAndMap(); 1611 bool kernel_symbols_available = false; 1612 if (CheckKernelSymbolAddresses()) { 1613 Dso::ReadKernelSymbolsFromProc(); 1614 kernel_symbols_available = true; 1615 } 1616 std::vector<uint64_t> auxtrace_offset; 1617 auto callback = [&](const Record* r) { 1618 thread_tree_.Update(*r); 1619 if (r->type() == PERF_RECORD_SAMPLE) { 1620 CollectHitFileInfo(*reinterpret_cast<const SampleRecord*>(r)); 1621 } else if (r->type() == PERF_RECORD_AUXTRACE) { 1622 auto auxtrace = static_cast<const AuxTraceRecord*>(r); 1623 auxtrace_offset.emplace_back(auxtrace->location.file_offset - auxtrace->size()); 1624 } 1625 }; 1626 if (!record_file_writer_->ReadDataSection(callback)) { 1627 return false; 1628 } 1629 1630 size_t feature_count = 6; 1631 if (branch_sampling_) { 1632 feature_count++; 1633 } 1634 if (!auxtrace_offset.empty()) { 1635 feature_count++; 1636 } 1637 if (!record_file_writer_->BeginWriteFeatures(feature_count)) { 1638 return false; 1639 } 1640 if (!DumpBuildIdFeature()) { 1641 return false; 1642 } 1643 if (!DumpFileFeature()) { 1644 return false; 1645 } 1646 utsname uname_buf; 1647 if (TEMP_FAILURE_RETRY(uname(&uname_buf)) != 0) { 1648 PLOG(ERROR) << "uname() failed"; 1649 return false; 1650 } 1651 if (!record_file_writer_->WriteFeatureString(PerfFileFormat::FEAT_OSRELEASE, 1652 uname_buf.release)) { 1653 return false; 1654 } 1655 if (!record_file_writer_->WriteFeatureString(PerfFileFormat::FEAT_ARCH, 1656 uname_buf.machine)) { 1657 return false; 1658 } 1659 1660 std::string exec_path = android::base::GetExecutablePath(); 1661 if (exec_path.empty()) exec_path = "simpleperf"; 1662 std::vector<std::string> cmdline; 1663 cmdline.push_back(exec_path); 1664 cmdline.push_back("record"); 1665 cmdline.insert(cmdline.end(), args.begin(), args.end()); 1666 if (!record_file_writer_->WriteCmdlineFeature(cmdline)) { 1667 return false; 1668 } 1669 if (branch_sampling_ != 0 && 1670 !record_file_writer_->WriteBranchStackFeature()) { 1671 return false; 1672 } 1673 if (!DumpMetaInfoFeature(kernel_symbols_available)) { 1674 return false; 1675 } 1676 if (!auxtrace_offset.empty() && !record_file_writer_->WriteAuxTraceFeature(auxtrace_offset)) { 1677 return false; 1678 } 1679 1680 if (!record_file_writer_->EndWriteFeatures()) { 1681 return false; 1682 } 1683 return true; 1684 } 1685 1686 bool RecordCommand::DumpBuildIdFeature() { 1687 std::vector<BuildIdRecord> build_id_records; 1688 BuildId build_id; 1689 std::vector<Dso*> dso_v = thread_tree_.GetAllDsos(); 1690 for (Dso* dso : dso_v) { 1691 // For aux tracing, we don't know which binaries are traced. 1692 // So dump build ids for all binaries. 1693 if (!dso->HasDumpId() && !event_selection_set_.HasAuxTrace()) { 1694 continue; 1695 } 1696 if (dso->type() == DSO_KERNEL) { 1697 if (!GetKernelBuildId(&build_id)) { 1698 continue; 1699 } 1700 build_id_records.push_back( 1701 BuildIdRecord(true, UINT_MAX, build_id, dso->Path())); 1702 } else if (dso->type() == DSO_KERNEL_MODULE) { 1703 std::string path = dso->Path(); 1704 std::string module_name = basename(&path[0]); 1705 if (android::base::EndsWith(module_name, ".ko")) { 1706 module_name = module_name.substr(0, module_name.size() - 3); 1707 } 1708 if (!GetModuleBuildId(module_name, &build_id)) { 1709 LOG(DEBUG) << "can't read build_id for module " << module_name; 1710 continue; 1711 } 1712 build_id_records.push_back(BuildIdRecord(true, UINT_MAX, build_id, path)); 1713 } else if (dso->type() == DSO_ELF_FILE) { 1714 if (dso->Path() == DEFAULT_EXECNAME_FOR_THREAD_MMAP) { 1715 continue; 1716 } 1717 if (!GetBuildIdFromDsoPath(dso->Path(), &build_id)) { 1718 LOG(DEBUG) << "Can't read build_id from file " << dso->Path(); 1719 continue; 1720 } 1721 build_id_records.push_back( 1722 BuildIdRecord(false, UINT_MAX, build_id, dso->Path())); 1723 } 1724 } 1725 if (!record_file_writer_->WriteBuildIdFeature(build_id_records)) { 1726 return false; 1727 } 1728 return true; 1729 } 1730 1731 bool RecordCommand::DumpFileFeature() { 1732 std::vector<Dso*> dso_v = thread_tree_.GetAllDsos(); 1733 return record_file_writer_->WriteFileFeatures(thread_tree_.GetAllDsos()); 1734 } 1735 1736 bool RecordCommand::DumpMetaInfoFeature(bool kernel_symbols_available) { 1737 std::unordered_map<std::string, std::string> info_map; 1738 info_map["simpleperf_version"] = GetSimpleperfVersion(); 1739 info_map["system_wide_collection"] = system_wide_collection_ ? "true" : "false"; 1740 info_map["trace_offcpu"] = trace_offcpu_ ? "true" : "false"; 1741 // By storing event types information in perf.data, the readers of perf.data have the same 1742 // understanding of event types, even if they are on another machine. 1743 info_map["event_type_info"] = ScopedEventTypes::BuildString(event_selection_set_.GetEvents()); 1744 #if defined(__ANDROID__) 1745 info_map["product_props"] = android::base::StringPrintf("%s:%s:%s", 1746 android::base::GetProperty("ro.product.manufacturer", "").c_str(), 1747 android::base::GetProperty("ro.product.model", "").c_str(), 1748 android::base::GetProperty("ro.product.name", "").c_str()); 1749 info_map["android_version"] = android::base::GetProperty("ro.build.version.release", ""); 1750 if (!app_package_name_.empty()) { 1751 info_map["app_package_name"] = app_package_name_; 1752 } 1753 #endif 1754 info_map["clockid"] = clockid_; 1755 info_map["timestamp"] = std::to_string(time(nullptr)); 1756 info_map["kernel_symbols_available"] = kernel_symbols_available ? "true" : "false"; 1757 return record_file_writer_->WriteMetaInfoFeature(info_map); 1758 } 1759 1760 void RecordCommand::CollectHitFileInfo(const SampleRecord& r) { 1761 const ThreadEntry* thread = 1762 thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid); 1763 const MapEntry* map = 1764 thread_tree_.FindMap(thread, r.ip_data.ip, r.InKernel()); 1765 Dso* dso = map->dso; 1766 const Symbol* symbol; 1767 if (dump_symbols_) { 1768 symbol = thread_tree_.FindSymbol(map, r.ip_data.ip, nullptr, &dso); 1769 if (!symbol->HasDumpId()) { 1770 dso->CreateSymbolDumpId(symbol); 1771 } 1772 } 1773 if (!dso->HasDumpId() && dso->type() != DSO_UNKNOWN_FILE) { 1774 dso->CreateDumpId(); 1775 } 1776 if (r.sample_type & PERF_SAMPLE_CALLCHAIN) { 1777 bool in_kernel = r.InKernel(); 1778 bool first_ip = true; 1779 for (uint64_t i = 0; i < r.callchain_data.ip_nr; ++i) { 1780 uint64_t ip = r.callchain_data.ips[i]; 1781 if (ip >= PERF_CONTEXT_MAX) { 1782 switch (ip) { 1783 case PERF_CONTEXT_KERNEL: 1784 in_kernel = true; 1785 break; 1786 case PERF_CONTEXT_USER: 1787 in_kernel = false; 1788 break; 1789 default: 1790 LOG(DEBUG) << "Unexpected perf_context in callchain: " << std::hex 1791 << ip; 1792 } 1793 } else { 1794 if (first_ip) { 1795 first_ip = false; 1796 // Remove duplication with sample ip. 1797 if (ip == r.ip_data.ip) { 1798 continue; 1799 } 1800 } 1801 map = thread_tree_.FindMap(thread, ip, in_kernel); 1802 dso = map->dso; 1803 if (dump_symbols_) { 1804 symbol = thread_tree_.FindSymbol(map, ip, nullptr, &dso); 1805 if (!symbol->HasDumpId()) { 1806 dso->CreateSymbolDumpId(symbol); 1807 } 1808 } 1809 if (!dso->HasDumpId() && dso->type() != DSO_UNKNOWN_FILE) { 1810 dso->CreateDumpId(); 1811 } 1812 } 1813 } 1814 } 1815 } 1816 1817 void RegisterRecordCommand() { 1818 RegisterCommand("record", 1819 [] { return std::unique_ptr<Command>(new RecordCommand()); }); 1820 } 1821