1 /* 2 * Copyright (C) 2016 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "command.h" 18 19 #include <unordered_map> 20 21 #include <android-base/logging.h> 22 #include <android-base/strings.h> 23 24 #include "callchain.h" 25 #include "event_attr.h" 26 #include "event_type.h" 27 #include "record_file.h" 28 #include "sample_tree.h" 29 #include "tracing.h" 30 #include "utils.h" 31 32 namespace { 33 34 struct SlabSample { 35 const Symbol* symbol; // the function making allocation 36 uint64_t ptr; // the start address of the allocated space 37 uint64_t bytes_req; // requested space size 38 uint64_t bytes_alloc; // allocated space size 39 uint64_t sample_count; // count of allocations 40 uint64_t gfp_flags; // flags used for allocation 41 uint64_t cross_cpu_allocations; // count of allocations freed not on the 42 // cpu allocating them 43 CallChainRoot<SlabSample> callchain; // a callchain tree representing all 44 // callchains in this sample 45 SlabSample(const Symbol* symbol, uint64_t ptr, uint64_t bytes_req, 46 uint64_t bytes_alloc, uint64_t sample_count, uint64_t gfp_flags, 47 uint64_t cross_cpu_allocations) 48 : symbol(symbol), 49 ptr(ptr), 50 bytes_req(bytes_req), 51 bytes_alloc(bytes_alloc), 52 sample_count(sample_count), 53 gfp_flags(gfp_flags), 54 cross_cpu_allocations(cross_cpu_allocations) {} 55 56 uint64_t GetPeriod() const { 57 return sample_count; 58 } 59 }; 60 61 struct SlabAccumulateInfo { 62 uint64_t bytes_req; 63 uint64_t bytes_alloc; 64 }; 65 66 BUILD_COMPARE_VALUE_FUNCTION(ComparePtr, ptr); 67 BUILD_COMPARE_VALUE_FUNCTION_REVERSE(CompareBytesReq, bytes_req); 68 BUILD_COMPARE_VALUE_FUNCTION_REVERSE(CompareBytesAlloc, bytes_alloc); 69 BUILD_COMPARE_VALUE_FUNCTION(CompareGfpFlags, gfp_flags); 70 BUILD_COMPARE_VALUE_FUNCTION_REVERSE(CompareCrossCpuAllocations, 71 cross_cpu_allocations); 72 73 BUILD_DISPLAY_HEX64_FUNCTION(DisplayPtr, ptr); 74 BUILD_DISPLAY_UINT64_FUNCTION(DisplayBytesReq, bytes_req); 75 BUILD_DISPLAY_UINT64_FUNCTION(DisplayBytesAlloc, bytes_alloc); 76 BUILD_DISPLAY_HEX64_FUNCTION(DisplayGfpFlags, gfp_flags); 77 BUILD_DISPLAY_UINT64_FUNCTION(DisplayCrossCpuAllocations, 78 cross_cpu_allocations); 79 80 static int CompareFragment(const SlabSample* sample1, 81 const SlabSample* sample2) { 82 uint64_t frag1 = sample1->bytes_alloc - sample1->bytes_req; 83 uint64_t frag2 = sample2->bytes_alloc - sample2->bytes_req; 84 return Compare(frag2, frag1); 85 } 86 87 static std::string DisplayFragment(const SlabSample* sample) { 88 return android::base::StringPrintf("%" PRIu64, 89 sample->bytes_alloc - sample->bytes_req); 90 } 91 92 struct SlabSampleTree { 93 std::vector<SlabSample*> samples; 94 uint64_t total_requested_bytes; 95 uint64_t total_allocated_bytes; 96 uint64_t nr_allocations; 97 uint64_t nr_frees; 98 uint64_t nr_cross_cpu_allocations; 99 }; 100 101 struct SlabFormat { 102 enum { 103 KMEM_ALLOC, 104 KMEM_FREE, 105 } type; 106 TracingFieldPlace call_site; 107 TracingFieldPlace ptr; 108 TracingFieldPlace bytes_req; 109 TracingFieldPlace bytes_alloc; 110 TracingFieldPlace gfp_flags; 111 }; 112 113 class SlabSampleTreeBuilder 114 : public SampleTreeBuilder<SlabSample, SlabAccumulateInfo> { 115 public: 116 SlabSampleTreeBuilder(const SampleComparator<SlabSample>& sample_comparator, 117 ThreadTree* thread_tree) 118 : SampleTreeBuilder(sample_comparator), 119 thread_tree_(thread_tree), 120 total_requested_bytes_(0), 121 total_allocated_bytes_(0), 122 nr_allocations_(0), 123 nr_cross_cpu_allocations_(0) {} 124 125 SlabSampleTree GetSampleTree() const { 126 SlabSampleTree sample_tree; 127 sample_tree.samples = GetSamples(); 128 sample_tree.total_requested_bytes = total_requested_bytes_; 129 sample_tree.total_allocated_bytes = total_allocated_bytes_; 130 sample_tree.nr_allocations = nr_allocations_; 131 sample_tree.nr_frees = nr_frees_; 132 sample_tree.nr_cross_cpu_allocations = nr_cross_cpu_allocations_; 133 return sample_tree; 134 } 135 136 void AddSlabFormat(const std::vector<uint64_t>& event_ids, 137 SlabFormat format) { 138 std::unique_ptr<SlabFormat> p(new SlabFormat(format)); 139 for (auto id : event_ids) { 140 event_id_to_format_map_[id] = p.get(); 141 } 142 formats_.push_back(std::move(p)); 143 } 144 145 protected: 146 SlabSample* CreateSample(const SampleRecord& r, bool in_kernel, 147 SlabAccumulateInfo* acc_info) override { 148 if (!in_kernel) { 149 // Normally we don't parse records in user space because tracepoint 150 // events all happen in kernel. But if r.ip_data.ip == 0, it may be 151 // a kernel record failed to dump ip register and is still useful. 152 if (r.ip_data.ip == 0) { 153 // It seems we are on a kernel can't dump regset for tracepoint events 154 // because of lacking perf_arch_fetch_caller_regs(). We can't get 155 // callchain, but we can still do a normal report. 156 static bool first = true; 157 if (first) { 158 first = false; 159 if (accumulate_callchain_) { 160 // The kernel doesn't seem to support dumping registers for 161 // tracepoint events because of lacking 162 // perf_arch_fetch_caller_regs(). 163 LOG(WARNING) << "simpleperf may not get callchains for tracepoint" 164 << " events because of lacking kernel support."; 165 } 166 } 167 } else { 168 return nullptr; 169 } 170 } 171 uint64_t id = r.id_data.id; 172 auto it = event_id_to_format_map_.find(id); 173 if (it == event_id_to_format_map_.end()) { 174 return nullptr; 175 } 176 const char* raw_data = r.raw_data.data; 177 SlabFormat* format = it->second; 178 if (format->type == SlabFormat::KMEM_ALLOC) { 179 uint64_t call_site = format->call_site.ReadFromData(raw_data); 180 const Symbol* symbol = thread_tree_->FindKernelSymbol(call_site); 181 uint64_t ptr = format->ptr.ReadFromData(raw_data); 182 uint64_t bytes_req = format->bytes_req.ReadFromData(raw_data); 183 uint64_t bytes_alloc = format->bytes_alloc.ReadFromData(raw_data); 184 uint64_t gfp_flags = format->gfp_flags.ReadFromData(raw_data); 185 SlabSample* sample = 186 InsertSample(std::unique_ptr<SlabSample>(new SlabSample( 187 symbol, ptr, bytes_req, bytes_alloc, 1, gfp_flags, 0))); 188 alloc_cpu_record_map_.insert( 189 std::make_pair(ptr, std::make_pair(r.cpu_data.cpu, sample))); 190 acc_info->bytes_req = bytes_req; 191 acc_info->bytes_alloc = bytes_alloc; 192 return sample; 193 } else if (format->type == SlabFormat::KMEM_FREE) { 194 uint64_t ptr = format->ptr.ReadFromData(raw_data); 195 auto it = alloc_cpu_record_map_.find(ptr); 196 if (it != alloc_cpu_record_map_.end()) { 197 SlabSample* sample = it->second.second; 198 if (r.cpu_data.cpu != it->second.first) { 199 sample->cross_cpu_allocations++; 200 nr_cross_cpu_allocations_++; 201 } 202 alloc_cpu_record_map_.erase(it); 203 } 204 nr_frees_++; 205 } 206 return nullptr; 207 } 208 209 SlabSample* CreateBranchSample(const SampleRecord&, 210 const BranchStackItemType&) override { 211 return nullptr; 212 } 213 214 SlabSample* CreateCallChainSample(const ThreadEntry*, 215 const SlabSample* sample, uint64_t ip, bool in_kernel, 216 const std::vector<SlabSample*>& callchain, 217 const SlabAccumulateInfo& acc_info) override { 218 if (!in_kernel) { 219 return nullptr; 220 } 221 const Symbol* symbol = thread_tree_->FindKernelSymbol(ip); 222 return InsertCallChainSample( 223 std::unique_ptr<SlabSample>( 224 new SlabSample(symbol, sample->ptr, acc_info.bytes_req, 225 acc_info.bytes_alloc, 1, sample->gfp_flags, 0)), 226 callchain); 227 } 228 229 const ThreadEntry* GetThreadOfSample(SlabSample*) override { return nullptr; } 230 231 uint64_t GetPeriodForCallChain(const SlabAccumulateInfo&) override { 232 // Decide the percentage of callchain by the sample_count, so use 1 as the 233 // period when calling AddCallChain(). 234 return 1; 235 } 236 237 void UpdateSummary(const SlabSample* sample) override { 238 total_requested_bytes_ += sample->bytes_req; 239 total_allocated_bytes_ += sample->bytes_alloc; 240 nr_allocations_++; 241 } 242 243 void MergeSample(SlabSample* sample1, SlabSample* sample2) override { 244 sample1->bytes_req += sample2->bytes_req; 245 sample1->bytes_alloc += sample2->bytes_alloc; 246 sample1->sample_count += sample2->sample_count; 247 } 248 249 private: 250 ThreadTree* thread_tree_; 251 uint64_t total_requested_bytes_; 252 uint64_t total_allocated_bytes_; 253 uint64_t nr_allocations_; 254 uint64_t nr_frees_; 255 uint64_t nr_cross_cpu_allocations_; 256 257 std::unordered_map<uint64_t, SlabFormat*> event_id_to_format_map_; 258 std::vector<std::unique_ptr<SlabFormat>> formats_; 259 std::unordered_map<uint64_t, std::pair<uint32_t, SlabSample*>> 260 alloc_cpu_record_map_; 261 }; 262 263 using SlabSampleTreeSorter = SampleTreeSorter<SlabSample>; 264 using SlabSampleTreeDisplayer = SampleTreeDisplayer<SlabSample, SlabSampleTree>; 265 using SlabSampleCallgraphDisplayer = 266 CallgraphDisplayer<SlabSample, CallChainNode<SlabSample>>; 267 268 struct EventAttrWithName { 269 perf_event_attr attr; 270 std::string name; 271 std::vector<uint64_t> event_ids; 272 }; 273 274 class KmemCommand : public Command { 275 public: 276 KmemCommand() 277 : Command( 278 "kmem", "collect kernel memory allocation information", 279 // clang-format off 280 "Usage: kmem (record [record options] | report [report options])\n" 281 "kmem record\n" 282 "-g Enable call graph recording. Same as '--call-graph fp'.\n" 283 "--slab Collect slab allocation information. Default option.\n" 284 "Other record options provided by simpleperf record command are also available.\n" 285 "kmem report\n" 286 "--children Print the accumulated allocation info appeared in the callchain.\n" 287 " Can be used on perf.data recorded with `--call-graph fp` option.\n" 288 "-g [callee|caller] Print call graph for perf.data recorded with\n" 289 " `--call-graph fp` option. If callee mode is used, the graph\n" 290 " shows how functions are called from others. Otherwise, the\n" 291 " graph shows how functions call others. Default is callee\n" 292 " mode. The percentage shown in the graph is determined by\n" 293 " the hit count of the callchain.\n" 294 "-i Specify path of record file, default is perf.data\n" 295 "-o report_file_name Set report file name, default is stdout.\n" 296 "--slab Report slab allocation information. Default option.\n" 297 "--slab-sort key1,key2,...\n" 298 " Select the keys to sort and print slab allocation information.\n" 299 " Should be used with --slab option. Possible keys include:\n" 300 " hit -- the allocation count.\n" 301 " caller -- the function calling allocation.\n" 302 " ptr -- the address of the allocated space.\n" 303 " bytes_req -- the total requested space size.\n" 304 " bytes_alloc -- the total allocated space size.\n" 305 " fragment -- the extra allocated space size\n" 306 " (bytes_alloc - bytes_req).\n" 307 " gfp_flags -- the flags used for allocation.\n" 308 " pingpong -- the count of allocations that are freed not on\n" 309 " the cpu allocating them.\n" 310 " The default slab sort keys are:\n" 311 " hit,caller,bytes_req,bytes_alloc,fragment,pingpong.\n" 312 // clang-format on 313 ), 314 is_record_(false), 315 use_slab_(false), 316 accumulate_callchain_(false), 317 print_callgraph_(false), 318 callgraph_show_callee_(false), 319 record_filename_("perf.data"), 320 record_file_arch_(GetBuildArch()) {} 321 322 bool Run(const std::vector<std::string>& args); 323 324 private: 325 bool ParseOptions(const std::vector<std::string>& args, 326 std::vector<std::string>* left_args); 327 bool RecordKmemInfo(const std::vector<std::string>& record_args); 328 bool ReportKmemInfo(); 329 bool PrepareToBuildSampleTree(); 330 void ReadEventAttrsFromRecordFile(); 331 bool ReadFeaturesFromRecordFile(); 332 bool ReadSampleTreeFromRecordFile(); 333 bool ProcessRecord(std::unique_ptr<Record> record); 334 void ProcessTracingData(const std::vector<char>& data); 335 bool PrintReport(); 336 void PrintReportContext(FILE* fp); 337 void PrintSlabReportContext(FILE* fp); 338 339 bool is_record_; 340 bool use_slab_; 341 std::vector<std::string> slab_sort_keys_; 342 bool accumulate_callchain_; 343 bool print_callgraph_; 344 bool callgraph_show_callee_; 345 346 std::string record_filename_; 347 std::unique_ptr<RecordFileReader> record_file_reader_; 348 std::vector<EventAttrWithName> event_attrs_; 349 std::string record_cmdline_; 350 ArchType record_file_arch_; 351 352 ThreadTree thread_tree_; 353 SlabSampleTree slab_sample_tree_; 354 std::unique_ptr<SlabSampleTreeBuilder> slab_sample_tree_builder_; 355 std::unique_ptr<SlabSampleTreeSorter> slab_sample_tree_sorter_; 356 std::unique_ptr<SlabSampleTreeDisplayer> slab_sample_tree_displayer_; 357 358 std::string report_filename_; 359 }; 360 361 bool KmemCommand::Run(const std::vector<std::string>& args) { 362 std::vector<std::string> left_args; 363 if (!ParseOptions(args, &left_args)) { 364 return false; 365 } 366 if (!use_slab_) { 367 use_slab_ = true; 368 } 369 if (is_record_) { 370 return RecordKmemInfo(left_args); 371 } 372 return ReportKmemInfo(); 373 } 374 375 bool KmemCommand::ParseOptions(const std::vector<std::string>& args, 376 std::vector<std::string>* left_args) { 377 if (args.empty()) { 378 LOG(ERROR) << "No subcommand specified"; 379 return false; 380 } 381 if (args[0] == "record") { 382 if (!IsRoot()) { 383 LOG(ERROR) << "simpleperf kmem record command needs root privilege"; 384 return false; 385 } 386 is_record_ = true; 387 size_t i; 388 for (i = 1; i < args.size() && !args[i].empty() && args[i][0] == '-'; ++i) { 389 if (args[i] == "-g") { 390 left_args->push_back("--call-graph"); 391 left_args->push_back("fp"); 392 } else if (args[i] == "--slab") { 393 use_slab_ = true; 394 } else { 395 left_args->push_back(args[i]); 396 } 397 } 398 left_args->insert(left_args->end(), args.begin() + i, args.end()); 399 } else if (args[0] == "report") { 400 is_record_ = false; 401 for (size_t i = 1; i < args.size(); ++i) { 402 if (args[i] == "--children") { 403 accumulate_callchain_ = true; 404 } else if (args[i] == "-g") { 405 print_callgraph_ = true; 406 accumulate_callchain_ = true; 407 callgraph_show_callee_ = true; 408 if (i + 1 < args.size() && args[i + 1][0] != '-') { 409 ++i; 410 if (args[i] == "callee") { 411 callgraph_show_callee_ = true; 412 } else if (args[i] == "caller") { 413 callgraph_show_callee_ = false; 414 } else { 415 LOG(ERROR) << "Unknown argument with -g option: " << args[i]; 416 return false; 417 } 418 } 419 } else if (args[i] == "-i") { 420 if (!NextArgumentOrError(args, &i)) { 421 return false; 422 } 423 record_filename_ = args[i]; 424 } else if (args[i] == "-o") { 425 if (!NextArgumentOrError(args, &i)) { 426 return false; 427 } 428 report_filename_ = args[i]; 429 } else if (args[i] == "--slab") { 430 use_slab_ = true; 431 } else if (args[i] == "--slab-sort") { 432 if (!NextArgumentOrError(args, &i)) { 433 return false; 434 } 435 slab_sort_keys_ = android::base::Split(args[i], ","); 436 } else { 437 ReportUnknownOption(args, i); 438 return false; 439 } 440 } 441 } else { 442 LOG(ERROR) << "Unknown subcommand for " << Name() << ": " << args[0] 443 << ". Try `simpleperf help " << Name() << "`"; 444 return false; 445 } 446 return true; 447 } 448 449 bool KmemCommand::RecordKmemInfo(const std::vector<std::string>& record_args) { 450 std::vector<std::string> args; 451 if (use_slab_) { 452 std::vector<std::string> trace_events = { 453 "kmem:kmalloc", "kmem:kmem_cache_alloc", 454 "kmem:kmalloc_node", "kmem:kmem_cache_alloc_node", 455 "kmem:kfree", "kmem:kmem_cache_free"}; 456 for (const auto& name : trace_events) { 457 if (ParseEventType(name)) { 458 args.insert(args.end(), {"-e", name}); 459 } 460 } 461 } 462 if (args.empty()) { 463 LOG(ERROR) << "Kernel allocation related trace events are not supported."; 464 return false; 465 } 466 args.push_back("-a"); 467 args.insert(args.end(), record_args.begin(), record_args.end()); 468 std::unique_ptr<Command> record_cmd = CreateCommandInstance("record"); 469 if (record_cmd == nullptr) { 470 LOG(ERROR) << "record command isn't available"; 471 return false; 472 } 473 return record_cmd->Run(args); 474 } 475 476 bool KmemCommand::ReportKmemInfo() { 477 if (!PrepareToBuildSampleTree()) { 478 return false; 479 } 480 record_file_reader_ = RecordFileReader::CreateInstance(record_filename_); 481 if (record_file_reader_ == nullptr) { 482 return false; 483 } 484 ReadEventAttrsFromRecordFile(); 485 if (!ReadFeaturesFromRecordFile()) { 486 return false; 487 } 488 if (!ReadSampleTreeFromRecordFile()) { 489 return false; 490 } 491 if (!PrintReport()) { 492 return false; 493 } 494 return true; 495 } 496 497 bool KmemCommand::PrepareToBuildSampleTree() { 498 if (use_slab_) { 499 if (slab_sort_keys_.empty()) { 500 slab_sort_keys_ = {"hit", "caller", "bytes_req", 501 "bytes_alloc", "fragment", "pingpong"}; 502 } 503 SampleComparator<SlabSample> comparator; 504 SampleComparator<SlabSample> sort_comparator; 505 SampleDisplayer<SlabSample, SlabSampleTree> displayer; 506 std::string accumulated_name = accumulate_callchain_ ? "Accumulated_" : ""; 507 508 if (print_callgraph_) { 509 displayer.AddExclusiveDisplayFunction(SlabSampleCallgraphDisplayer()); 510 } 511 512 for (const auto& key : slab_sort_keys_) { 513 if (key == "hit") { 514 sort_comparator.AddCompareFunction(CompareSampleCount); 515 displayer.AddDisplayFunction(accumulated_name + "Hit", 516 DisplaySampleCount); 517 } else if (key == "caller") { 518 comparator.AddCompareFunction(CompareSymbol); 519 displayer.AddDisplayFunction("Caller", DisplaySymbol); 520 } else if (key == "ptr") { 521 comparator.AddCompareFunction(ComparePtr); 522 displayer.AddDisplayFunction("Ptr", DisplayPtr); 523 } else if (key == "bytes_req") { 524 sort_comparator.AddCompareFunction(CompareBytesReq); 525 displayer.AddDisplayFunction(accumulated_name + "BytesReq", 526 DisplayBytesReq); 527 } else if (key == "bytes_alloc") { 528 sort_comparator.AddCompareFunction(CompareBytesAlloc); 529 displayer.AddDisplayFunction(accumulated_name + "BytesAlloc", 530 DisplayBytesAlloc); 531 } else if (key == "fragment") { 532 sort_comparator.AddCompareFunction(CompareFragment); 533 displayer.AddDisplayFunction(accumulated_name + "Fragment", 534 DisplayFragment); 535 } else if (key == "gfp_flags") { 536 comparator.AddCompareFunction(CompareGfpFlags); 537 displayer.AddDisplayFunction("GfpFlags", DisplayGfpFlags); 538 } else if (key == "pingpong") { 539 sort_comparator.AddCompareFunction(CompareCrossCpuAllocations); 540 displayer.AddDisplayFunction("Pingpong", DisplayCrossCpuAllocations); 541 } else { 542 LOG(ERROR) << "Unknown sort key for slab allocation: " << key; 543 return false; 544 } 545 slab_sample_tree_builder_.reset( 546 new SlabSampleTreeBuilder(comparator, &thread_tree_)); 547 slab_sample_tree_builder_->SetCallChainSampleOptions( 548 accumulate_callchain_, print_callgraph_, !callgraph_show_callee_); 549 sort_comparator.AddComparator(comparator); 550 slab_sample_tree_sorter_.reset(new SlabSampleTreeSorter(sort_comparator)); 551 slab_sample_tree_displayer_.reset(new SlabSampleTreeDisplayer(displayer)); 552 } 553 } 554 return true; 555 } 556 557 void KmemCommand::ReadEventAttrsFromRecordFile() { 558 std::vector<EventAttrWithId> attrs = record_file_reader_->AttrSection(); 559 for (const auto& attr_with_id : attrs) { 560 EventAttrWithName attr; 561 attr.attr = *attr_with_id.attr; 562 attr.event_ids = attr_with_id.ids; 563 attr.name = GetEventNameByAttr(attr.attr); 564 event_attrs_.push_back(attr); 565 } 566 } 567 568 bool KmemCommand::ReadFeaturesFromRecordFile() { 569 record_file_reader_->LoadBuildIdAndFileFeatures(thread_tree_); 570 std::string arch = 571 record_file_reader_->ReadFeatureString(PerfFileFormat::FEAT_ARCH); 572 if (!arch.empty()) { 573 record_file_arch_ = GetArchType(arch); 574 if (record_file_arch_ == ARCH_UNSUPPORTED) { 575 return false; 576 } 577 } 578 std::vector<std::string> cmdline = record_file_reader_->ReadCmdlineFeature(); 579 if (!cmdline.empty()) { 580 record_cmdline_ = android::base::Join(cmdline, ' '); 581 } 582 if (record_file_reader_->HasFeature(PerfFileFormat::FEAT_TRACING_DATA)) { 583 std::vector<char> tracing_data; 584 if (!record_file_reader_->ReadFeatureSection( 585 PerfFileFormat::FEAT_TRACING_DATA, &tracing_data)) { 586 return false; 587 } 588 ProcessTracingData(tracing_data); 589 } 590 return true; 591 } 592 593 bool KmemCommand::ReadSampleTreeFromRecordFile() { 594 if (!record_file_reader_->ReadDataSection( 595 [this](std::unique_ptr<Record> record) { 596 return ProcessRecord(std::move(record)); 597 })) { 598 return false; 599 } 600 if (use_slab_) { 601 slab_sample_tree_ = slab_sample_tree_builder_->GetSampleTree(); 602 slab_sample_tree_sorter_->Sort(slab_sample_tree_.samples, print_callgraph_); 603 } 604 return true; 605 } 606 607 bool KmemCommand::ProcessRecord(std::unique_ptr<Record> record) { 608 thread_tree_.Update(*record); 609 if (record->type() == PERF_RECORD_SAMPLE) { 610 if (use_slab_) { 611 slab_sample_tree_builder_->ProcessSampleRecord( 612 *static_cast<const SampleRecord*>(record.get())); 613 } 614 } else if (record->type() == PERF_RECORD_TRACING_DATA || 615 record->type() == SIMPLE_PERF_RECORD_TRACING_DATA) { 616 const auto& r = *static_cast<TracingDataRecord*>(record.get()); 617 ProcessTracingData(std::vector<char>(r.data, r.data + r.data_size)); 618 } 619 return true; 620 } 621 622 void KmemCommand::ProcessTracingData(const std::vector<char>& data) { 623 Tracing tracing(data); 624 for (auto& attr : event_attrs_) { 625 if (attr.attr.type == PERF_TYPE_TRACEPOINT) { 626 uint64_t trace_event_id = attr.attr.config; 627 attr.name = tracing.GetTracingEventNameHavingId(trace_event_id); 628 TracingFormat format = tracing.GetTracingFormatHavingId(trace_event_id); 629 if (use_slab_) { 630 if (format.name == "kmalloc" || format.name == "kmem_cache_alloc" || 631 format.name == "kmalloc_node" || 632 format.name == "kmem_cache_alloc_node") { 633 SlabFormat f; 634 f.type = SlabFormat::KMEM_ALLOC; 635 format.GetField("call_site", f.call_site); 636 format.GetField("ptr", f.ptr); 637 format.GetField("bytes_req", f.bytes_req); 638 format.GetField("bytes_alloc", f.bytes_alloc); 639 format.GetField("gfp_flags", f.gfp_flags); 640 slab_sample_tree_builder_->AddSlabFormat(attr.event_ids, f); 641 } else if (format.name == "kfree" || format.name == "kmem_cache_free") { 642 SlabFormat f; 643 f.type = SlabFormat::KMEM_FREE; 644 format.GetField("call_site", f.call_site); 645 format.GetField("ptr", f.ptr); 646 slab_sample_tree_builder_->AddSlabFormat(attr.event_ids, f); 647 } 648 } 649 } 650 } 651 } 652 653 bool KmemCommand::PrintReport() { 654 std::unique_ptr<FILE, decltype(&fclose)> file_handler(nullptr, fclose); 655 FILE* report_fp = stdout; 656 if (!report_filename_.empty()) { 657 file_handler.reset(fopen(report_filename_.c_str(), "w")); 658 if (file_handler == nullptr) { 659 PLOG(ERROR) << "failed to open " << report_filename_; 660 return false; 661 } 662 report_fp = file_handler.get(); 663 } 664 PrintReportContext(report_fp); 665 if (use_slab_) { 666 fprintf(report_fp, "\n\n"); 667 PrintSlabReportContext(report_fp); 668 slab_sample_tree_displayer_->DisplaySamples( 669 report_fp, slab_sample_tree_.samples, &slab_sample_tree_); 670 } 671 return true; 672 } 673 674 void KmemCommand::PrintReportContext(FILE* fp) { 675 if (!record_cmdline_.empty()) { 676 fprintf(fp, "Cmdline: %s\n", record_cmdline_.c_str()); 677 } 678 fprintf(fp, "Arch: %s\n", GetArchString(record_file_arch_).c_str()); 679 for (const auto& attr : event_attrs_) { 680 fprintf(fp, "Event: %s (type %u, config %llu)\n", attr.name.c_str(), 681 attr.attr.type, attr.attr.config); 682 } 683 } 684 685 void KmemCommand::PrintSlabReportContext(FILE* fp) { 686 fprintf(fp, "Slab allocation information:\n"); 687 fprintf(fp, "Total requested bytes: %" PRIu64 "\n", 688 slab_sample_tree_.total_requested_bytes); 689 fprintf(fp, "Total allocated bytes: %" PRIu64 "\n", 690 slab_sample_tree_.total_allocated_bytes); 691 uint64_t fragment = slab_sample_tree_.total_allocated_bytes - 692 slab_sample_tree_.total_requested_bytes; 693 double percentage = 0.0; 694 if (slab_sample_tree_.total_allocated_bytes != 0) { 695 percentage = 100.0 * fragment / slab_sample_tree_.total_allocated_bytes; 696 } 697 fprintf(fp, "Total fragment: %" PRIu64 ", %f%%\n", fragment, percentage); 698 fprintf(fp, "Total allocations: %" PRIu64 "\n", 699 slab_sample_tree_.nr_allocations); 700 fprintf(fp, "Total frees: %" PRIu64 "\n", slab_sample_tree_.nr_frees); 701 percentage = 0.0; 702 if (slab_sample_tree_.nr_allocations != 0) { 703 percentage = 100.0 * slab_sample_tree_.nr_cross_cpu_allocations / 704 slab_sample_tree_.nr_allocations; 705 } 706 fprintf(fp, "Total cross cpu allocation/free: %" PRIu64 ", %f%%\n", 707 slab_sample_tree_.nr_cross_cpu_allocations, percentage); 708 fprintf(fp, "\n"); 709 } 710 711 } // namespace 712 713 void RegisterKmemCommand() { 714 RegisterCommand("kmem", 715 [] { return std::unique_ptr<Command>(new KmemCommand()); }); 716 } 717