1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include <inttypes.h> 18 #include <signal.h> 19 #include <stdio.h> 20 #include <string.h> 21 #include <sys/prctl.h> 22 23 #include <algorithm> 24 #include <chrono> 25 #include <set> 26 #include <string> 27 #include <string_view> 28 #include <vector> 29 30 #include <android-base/file.h> 31 #include <android-base/logging.h> 32 #include <android-base/strings.h> 33 #include <android-base/unique_fd.h> 34 35 #include "cmd_stat_impl.h" 36 #include "command.h" 37 #include "environment.h" 38 #include "event_attr.h" 39 #include "event_fd.h" 40 #include "event_selection_set.h" 41 #include "event_type.h" 42 #include "IOEventLoop.h" 43 #include "utils.h" 44 #include "workload.h" 45 46 using namespace simpleperf; 47 48 namespace simpleperf { 49 50 static std::vector<std::string> default_measured_event_types{ 51 "cpu-cycles", "stalled-cycles-frontend", "stalled-cycles-backend", 52 "instructions", "branch-instructions", "branch-misses", 53 "task-clock", "context-switches", "page-faults", 54 }; 55 56 static const std::unordered_map<std::string_view, std::pair<std::string_view, std::string_view>> 57 COMMON_EVENT_RATE_MAP = { 58 {"cache-misses", {"cache-references", "miss rate"}}, 59 {"branch-misses", {"branch-instructions", "miss rate"}}, 60 }; 61 62 static const std::unordered_map<std::string_view, std::pair<std::string_view, std::string_view>> 63 ARM_EVENT_RATE_MAP = { 64 // Refer to "D6.10.5 Meaningful ratios between common microarchitectural events" in ARMv8 65 // specification. 66 {"raw-l1i-cache-refill", {"raw-l1i-cache", "level 1 instruction cache refill rate"}}, 67 {"raw-l1i-tlb-refill", {"raw-l1i-tlb", "level 1 instruction TLB refill rate"}}, 68 {"raw-l1d-cache-refill", {"raw-l1d-cache", "level 1 data or unified cache refill rate"}}, 69 {"raw-l1d-tlb-refill", {"raw-l1d-tlb", "level 1 data or unified TLB refill rate"}}, 70 {"raw-l2d-cache-refill", {"raw-l2d-cache", "level 2 data or unified cache refill rate"}}, 71 {"raw-l2i-cache-refill", {"raw-l2i-cache", "level 2 instruction cache refill rate"}}, 72 {"raw-l3d-cache-refill", {"raw-l3d-cache", "level 3 data or unified cache refill rate"}}, 73 {"raw-l2d-tlb-refill", {"raw-l2d-tlb", "level 2 data or unified TLB refill rate"}}, 74 {"raw-l2i-tlb-refill", {"raw-l2i-tlb", "level 2 instruction TLB refill rate"}}, 75 {"raw-bus-access", {"raw-bus-cycles", "bus accesses per cycle"}}, 76 {"raw-ll-cache-miss", {"raw-ll-cache", "last level data or unified cache refill rate"}}, 77 {"raw-dtlb-walk", {"raw-l1d-tlb", "data TLB miss rate"}}, 78 {"raw-itlb-walk", {"raw-l1i-tlb", "instruction TLB miss rate"}}, 79 {"raw-ll-cache-miss-rd", {"raw-ll-cache-rd", "memory read operation miss rate"}}, 80 {"raw-remote-access-rd", 81 {"raw-remote-access", "read accesses to another socket in a multi-socket system"}}, 82 // Refer to "Table K3-2 Relationship between REFILL events and associated access events" in 83 // ARMv8 specification. 84 {"raw-l1d-cache-refill-rd", {"raw-l1d-cache-rd", "level 1 cache refill rate, read"}}, 85 {"raw-l1d-cache-refill-wr", {"raw-l1d-cache-wr", "level 1 cache refill rate, write"}}, 86 {"raw-l1d-tlb-refill-rd", {"raw-l1d-tlb-rd", "level 1 TLB refill rate, read"}}, 87 {"raw-l1d-tlb-refill-wr", {"raw-l1d-tlb-wr", "level 1 TLB refill rate, write"}}, 88 {"raw-l2d-cache-refill-rd", {"raw-l2d-cache-rd", "level 2 data cache refill rate, read"}}, 89 {"raw-l2d-cache-refill-wr", {"raw-l2d-cache-wr", "level 2 data cache refill rate, write"}}, 90 {"raw-l2d-tlb-refill-rd", {"raw-l2d-tlb-rd", "level 2 data TLB refill rate, read"}}, 91 }; 92 93 const CounterSummary* CounterSummaries::FindSummary(const std::string& type_name, 94 const std::string& modifier, 95 const ThreadInfo* thread, int cpu) { 96 for (const auto& s : summaries_) { 97 if (s.type_name == type_name && s.modifier == modifier && s.thread == thread && s.cpu == cpu) { 98 return &s; 99 } 100 } 101 return nullptr; 102 } 103 104 void CounterSummaries::AutoGenerateSummaries() { 105 for (size_t i = 0; i < summaries_.size(); ++i) { 106 const CounterSummary& s = summaries_[i]; 107 if (s.modifier == "u") { 108 const CounterSummary* other = FindSummary(s.type_name, "k", s.thread, s.cpu); 109 if (other != nullptr && other->IsMonitoredAtTheSameTime(s)) { 110 if (FindSummary(s.type_name, "", s.thread, s.cpu) == nullptr) { 111 summaries_.emplace_back(s.type_name, "", s.group_id, s.thread, s.cpu, 112 s.count + other->count, s.runtime_in_ns, s.scale, true, csv_); 113 } 114 } 115 } 116 } 117 } 118 119 void CounterSummaries::GenerateComments(double duration_in_sec) { 120 for (auto& s : summaries_) { 121 s.comment = GetCommentForSummary(s, duration_in_sec); 122 } 123 } 124 125 void CounterSummaries::Show(FILE* fp) { 126 if (csv_) { 127 ShowCSV(fp); 128 } else { 129 ShowText(fp); 130 } 131 } 132 133 void CounterSummaries::ShowCSV(FILE* fp) { 134 for (auto& s : summaries_) { 135 if (s.thread != nullptr) { 136 fprintf(fp, "%s,%d,%d,", s.thread->name.c_str(), s.thread->pid, s.thread->tid); 137 } 138 fprintf(fp, "%s,%s,%s,(%.0f%%)%s\n", s.readable_count.c_str(), s.Name().c_str(), 139 s.comment.c_str(), 1.0 / s.scale * 100, (s.auto_generated ? " (generated)," : ",")); 140 } 141 } 142 143 void CounterSummaries::ShowText(FILE* fp) { 144 bool show_thread = !summaries_.empty() && summaries_[0].thread != nullptr; 145 bool show_cpu = !summaries_.empty() && summaries_[0].cpu != -1; 146 std::vector<std::string> titles; 147 148 if (show_thread) { 149 titles = {"thread_name", "pid", "tid"}; 150 } 151 if (show_cpu) { 152 titles.emplace_back("cpu"); 153 } 154 titles.emplace_back("count"); 155 titles.emplace_back("event_name"); 156 titles.emplace_back(" # count / runtime, runtime / enabled_time"); 157 158 std::vector<size_t> width(titles.size(), 0); 159 160 auto adjust_width = [](size_t& w, size_t size) { w = std::max(w, size); }; 161 162 // The last title is too long. Don't include it for width adjustment. 163 for (size_t i = 0; i + 1 < titles.size(); i++) { 164 adjust_width(width[i], titles[i].size()); 165 } 166 167 for (auto& s : summaries_) { 168 size_t i = 0; 169 if (show_thread) { 170 adjust_width(width[i++], s.thread->name.size()); 171 adjust_width(width[i++], std::to_string(s.thread->pid).size()); 172 adjust_width(width[i++], std::to_string(s.thread->tid).size()); 173 } 174 if (show_cpu) { 175 adjust_width(width[i++], std::to_string(s.cpu).size()); 176 } 177 adjust_width(width[i++], s.readable_count.size()); 178 adjust_width(width[i++], s.Name().size()); 179 adjust_width(width[i++], s.comment.size()); 180 } 181 182 fprintf(fp, "# "); 183 for (size_t i = 0; i < titles.size(); i++) { 184 if (titles[i] == "count") { 185 fprintf(fp, "%*s", static_cast<int>(width[i]), titles[i].c_str()); 186 } else { 187 fprintf(fp, "%-*s", static_cast<int>(width[i]), titles[i].c_str()); 188 } 189 if (i + 1 < titles.size()) { 190 fprintf(fp, " "); 191 } 192 } 193 fprintf(fp, "\n"); 194 195 for (auto& s : summaries_) { 196 size_t i = 0; 197 if (show_thread) { 198 fprintf(fp, " %-*s", static_cast<int>(width[i++]), s.thread->name.c_str()); 199 fprintf(fp, " %-*d", static_cast<int>(width[i++]), s.thread->pid); 200 fprintf(fp, " %-*d", static_cast<int>(width[i++]), s.thread->tid); 201 } 202 if (show_cpu) { 203 fprintf(fp, " %-*d", static_cast<int>(width[i++]), s.cpu); 204 } 205 fprintf(fp, " %*s %-*s # %-*s (%.0f%%)%s\n", static_cast<int>(width[i]), 206 s.readable_count.c_str(), static_cast<int>(width[i + 1]), s.Name().c_str(), 207 static_cast<int>(width[i + 2]), s.comment.c_str(), 1.0 / s.scale * 100, 208 (s.auto_generated ? " (generated)" : "")); 209 } 210 } 211 212 std::string CounterSummaries::GetCommentForSummary(const CounterSummary& s, 213 double duration_in_sec) { 214 char sap_mid; 215 if (csv_) { 216 sap_mid = ','; 217 } else { 218 sap_mid = ' '; 219 } 220 if (s.type_name == "task-clock") { 221 double run_sec = s.count / 1e9; 222 double used_cpus = run_sec / duration_in_sec; 223 return android::base::StringPrintf("%f%ccpus used", used_cpus, sap_mid); 224 } 225 if (s.type_name == "cpu-clock") { 226 return ""; 227 } 228 if (s.type_name == "cpu-cycles") { 229 if (s.runtime_in_ns == 0) { 230 return ""; 231 } 232 double ghz = static_cast<double>(s.count) / s.runtime_in_ns; 233 return android::base::StringPrintf("%f%cGHz", ghz, sap_mid); 234 } 235 if (s.type_name == "instructions" && s.count != 0) { 236 const CounterSummary* other = FindSummary("cpu-cycles", s.modifier, s.thread, s.cpu); 237 if (other != nullptr && other->IsMonitoredAtTheSameTime(s)) { 238 double cpi = static_cast<double>(other->count) / s.count; 239 return android::base::StringPrintf("%f%ccycles per instruction", cpi, sap_mid); 240 } 241 } 242 std::string rate_comment = GetRateComment(s, sap_mid); 243 if (!rate_comment.empty()) { 244 return rate_comment; 245 } 246 if (s.runtime_in_ns == 0) { 247 return ""; 248 } 249 double runtime_in_sec = static_cast<double>(s.runtime_in_ns) / 1e9; 250 double rate = s.count / runtime_in_sec; 251 if (rate >= 1e9 - 1e5) { 252 return android::base::StringPrintf("%.3f%cG/sec", rate / 1e9, sap_mid); 253 } 254 if (rate >= 1e6 - 1e2) { 255 return android::base::StringPrintf("%.3f%cM/sec", rate / 1e6, sap_mid); 256 } 257 if (rate >= 1e3) { 258 return android::base::StringPrintf("%.3f%cK/sec", rate / 1e3, sap_mid); 259 } 260 return android::base::StringPrintf("%.3f%c/sec", rate, sap_mid); 261 } 262 263 std::string CounterSummaries::GetRateComment(const CounterSummary& s, char sep) { 264 std::string_view miss_event_name = s.type_name; 265 std::string event_name; 266 std::string rate_desc; 267 if (auto it = COMMON_EVENT_RATE_MAP.find(miss_event_name); it != COMMON_EVENT_RATE_MAP.end()) { 268 event_name = it->second.first; 269 rate_desc = it->second.second; 270 } 271 if (event_name.empty() && (GetBuildArch() == ARCH_ARM || GetBuildArch() == ARCH_ARM64)) { 272 if (auto it = ARM_EVENT_RATE_MAP.find(miss_event_name); it != ARM_EVENT_RATE_MAP.end()) { 273 event_name = it->second.first; 274 rate_desc = it->second.second; 275 } 276 } 277 if (event_name.empty() && android::base::ConsumeSuffix(&miss_event_name, "-misses")) { 278 event_name = std::string(miss_event_name) + "s"; 279 rate_desc = "miss rate"; 280 } 281 if (!event_name.empty()) { 282 const CounterSummary* other = FindSummary(event_name, s.modifier, s.thread, s.cpu); 283 if (other != nullptr && other->IsMonitoredAtTheSameTime(s) && other->count != 0) { 284 double miss_rate = static_cast<double>(s.count) / other->count; 285 return android::base::StringPrintf("%f%%%c%s", miss_rate * 100, sep, rate_desc.c_str()); 286 } 287 } 288 return ""; 289 } 290 291 } // namespace simpleperf 292 293 namespace { 294 295 // devfreq may use performance counters to calculate memory latency (as in 296 // drivers/devfreq/arm-memlat-mon.c). Hopefully we can get more available counters by asking devfreq 297 // to not use the memory latency governor temporarily. 298 class DevfreqCounters { 299 public: 300 bool Use() { 301 if (!IsRoot()) { 302 LOG(ERROR) << "--use-devfreq-counters needs root permission to set devfreq governors"; 303 return false; 304 } 305 std::string devfreq_dir = "/sys/class/devfreq/"; 306 for (auto& name : GetSubDirs(devfreq_dir)) { 307 std::string governor_path = devfreq_dir + name + "/governor"; 308 if (IsRegularFile(governor_path)) { 309 std::string governor; 310 if (!android::base::ReadFileToString(governor_path, &governor)) { 311 LOG(ERROR) << "failed to read " << governor_path; 312 return false; 313 } 314 governor = android::base::Trim(governor); 315 if (governor == "mem_latency") { 316 if (!android::base::WriteStringToFile("performance", governor_path)) { 317 PLOG(ERROR) << "failed to write " << governor_path; 318 return false; 319 } 320 mem_latency_governor_paths_.emplace_back(std::move(governor_path)); 321 } 322 } 323 } 324 return true; 325 } 326 327 ~DevfreqCounters() { 328 for (auto& path : mem_latency_governor_paths_) { 329 android::base::WriteStringToFile("mem_latency", path); 330 } 331 } 332 333 private: 334 std::vector<std::string> mem_latency_governor_paths_; 335 }; 336 337 class StatCommand : public Command { 338 public: 339 StatCommand() 340 : Command("stat", "gather performance counter information", 341 // clang-format off 342 "Usage: simpleperf stat [options] [command [command-args]]\n" 343 " Gather performance counter information of running [command].\n" 344 " And -a/-p/-t option can be used to change target of counter information.\n" 345 "-a Collect system-wide information.\n" 346 #if defined(__ANDROID__) 347 "--app package_name Profile the process of an Android application.\n" 348 " On non-rooted devices, the app must be debuggable,\n" 349 " because we use run-as to switch to the app's context.\n" 350 #endif 351 "--cpu cpu_item1,cpu_item2,...\n" 352 " Collect information only on the selected cpus. cpu_item can\n" 353 " be a cpu number like 1, or a cpu range like 0-3.\n" 354 "--csv Write report in comma separate form.\n" 355 "--duration time_in_sec Monitor for time_in_sec seconds instead of running\n" 356 " [command]. Here time_in_sec may be any positive\n" 357 " floating point number.\n" 358 "--interval time_in_ms Print stat for every time_in_ms milliseconds.\n" 359 " Here time_in_ms may be any positive floating point\n" 360 " number. Simpleperf prints total values from the\n" 361 " starting point. But this can be changed by\n" 362 " --interval-only-values.\n" 363 "--interval-only-values Print numbers of events happened in each interval.\n" 364 "-e event1[:modifier1],event2[:modifier2],...\n" 365 " Select a list of events to count. An event can be:\n" 366 " 1) an event name listed in `simpleperf list`;\n" 367 " 2) a raw PMU event in rN format. N is a hex number.\n" 368 " For example, r1b selects event number 0x1b.\n" 369 " Modifiers can be added to define how the event should be\n" 370 " monitored. Possible modifiers are:\n" 371 " u - monitor user space events only\n" 372 " k - monitor kernel space events only\n" 373 "--group event1[:modifier],event2[:modifier2],...\n" 374 " Similar to -e option. But events specified in the same --group\n" 375 " option are monitored as a group, and scheduled in and out at the\n" 376 " same time.\n" 377 "--no-inherit Don't stat created child threads/processes.\n" 378 "-o output_filename Write report to output_filename instead of standard output.\n" 379 "--per-core Print counters for each cpu core.\n" 380 "--per-thread Print counters for each thread.\n" 381 "-p pid1,pid2,... Stat events on existing processes. Mutually exclusive with -a.\n" 382 "-t tid1,tid2,... Stat events on existing threads. Mutually exclusive with -a.\n" 383 #if defined(__ANDROID__) 384 "--use-devfreq-counters On devices with Qualcomm SOCs, some hardware counters may be used\n" 385 " to monitor memory latency (in drivers/devfreq/arm-memlat-mon.c),\n" 386 " making fewer counters available to users. This option asks devfreq\n" 387 " to temporarily release counters by replacing memory-latency governor\n" 388 " with performance governor. It affects memory latency during profiling,\n" 389 " and may cause wedged power if simpleperf is killed in between.\n" 390 #endif 391 "--verbose Show result in verbose mode.\n" 392 #if 0 393 // Below options are only used internally and shouldn't be visible to the public. 394 "--in-app We are already running in the app's context.\n" 395 "--tracepoint-events file_name Read tracepoint events from [file_name] instead of tracefs.\n" 396 "--out-fd <fd> Write output to a file descriptor.\n" 397 "--stop-signal-fd <fd> Stop stating when fd is readable.\n" 398 #endif 399 // clang-format on 400 ), 401 verbose_mode_(false), 402 system_wide_collection_(false), 403 child_inherit_(true), 404 duration_in_sec_(0), 405 interval_in_ms_(0), 406 interval_only_values_(false), 407 event_selection_set_(true), 408 csv_(false), 409 in_app_context_(false) { 410 // Die if parent exits. 411 prctl(PR_SET_PDEATHSIG, SIGHUP, 0, 0, 0); 412 } 413 414 bool Run(const std::vector<std::string>& args); 415 416 private: 417 bool ParseOptions(const std::vector<std::string>& args, 418 std::vector<std::string>* non_option_args); 419 bool AddDefaultMeasuredEventTypes(); 420 void SetEventSelectionFlags(); 421 void MonitorEachThread(); 422 void AdjustToIntervalOnlyValues(std::vector<CountersInfo>& counters); 423 bool ShowCounters(const std::vector<CountersInfo>& counters, 424 double duration_in_sec, FILE* fp); 425 426 bool verbose_mode_; 427 bool system_wide_collection_; 428 bool child_inherit_; 429 double duration_in_sec_; 430 double interval_in_ms_; 431 bool interval_only_values_; 432 std::vector<std::vector<CounterSum>> last_sum_values_; 433 std::vector<int> cpus_; 434 EventSelectionSet event_selection_set_; 435 std::string output_filename_; 436 android::base::unique_fd out_fd_; 437 bool csv_; 438 std::string app_package_name_; 439 bool in_app_context_; 440 android::base::unique_fd stop_signal_fd_; 441 bool use_devfreq_counters_ = false; 442 443 bool report_per_core_ = false; 444 bool report_per_thread_ = false; 445 // used to report event count for each thread 446 std::unordered_map<pid_t, ThreadInfo> thread_info_; 447 }; 448 449 bool StatCommand::Run(const std::vector<std::string>& args) { 450 if (!CheckPerfEventLimit()) { 451 return false; 452 } 453 AllowMoreOpenedFiles(); 454 455 // 1. Parse options, and use default measured event types if not given. 456 std::vector<std::string> workload_args; 457 if (!ParseOptions(args, &workload_args)) { 458 return false; 459 } 460 if (!app_package_name_.empty() && !in_app_context_) { 461 if (!IsRoot()) { 462 return RunInAppContext(app_package_name_, "stat", args, workload_args.size(), 463 output_filename_, !event_selection_set_.GetTracepointEvents().empty()); 464 } 465 } 466 DevfreqCounters devfreq_counters; 467 if (use_devfreq_counters_) { 468 if (!devfreq_counters.Use()) { 469 return false; 470 } 471 } 472 if (event_selection_set_.empty()) { 473 if (!AddDefaultMeasuredEventTypes()) { 474 return false; 475 } 476 } 477 SetEventSelectionFlags(); 478 479 // 2. Create workload. 480 std::unique_ptr<Workload> workload; 481 if (!workload_args.empty()) { 482 workload = Workload::CreateWorkload(workload_args); 483 if (workload == nullptr) { 484 return false; 485 } 486 } 487 bool need_to_check_targets = false; 488 if (system_wide_collection_) { 489 if (report_per_thread_) { 490 event_selection_set_.AddMonitoredProcesses(GetAllProcesses()); 491 } else { 492 event_selection_set_.AddMonitoredThreads({-1}); 493 } 494 } else if (!event_selection_set_.HasMonitoredTarget()) { 495 if (workload != nullptr) { 496 event_selection_set_.AddMonitoredProcesses({workload->GetPid()}); 497 event_selection_set_.SetEnableOnExec(true); 498 } else if (!app_package_name_.empty()) { 499 std::set<pid_t> pids = WaitForAppProcesses(app_package_name_); 500 event_selection_set_.AddMonitoredProcesses(pids); 501 } else { 502 LOG(ERROR) 503 << "No threads to monitor. Try `simpleperf help stat` for help\n"; 504 return false; 505 } 506 } else { 507 need_to_check_targets = true; 508 } 509 510 if (report_per_thread_) { 511 MonitorEachThread(); 512 } 513 514 // 3. Open perf_event_files and output file if defined. 515 if (cpus_.empty() && !report_per_core_ && (report_per_thread_ || !system_wide_collection_)) { 516 cpus_.push_back(-1); // Get event count for each thread on all cpus. 517 } 518 if (!event_selection_set_.OpenEventFiles(cpus_)) { 519 return false; 520 } 521 std::unique_ptr<FILE, decltype(&fclose)> fp_holder(nullptr, fclose); 522 if (!output_filename_.empty()) { 523 fp_holder.reset(fopen(output_filename_.c_str(), "we")); 524 if (fp_holder == nullptr) { 525 PLOG(ERROR) << "failed to open " << output_filename_; 526 return false; 527 } 528 } else if (out_fd_ != -1) { 529 fp_holder.reset(fdopen(out_fd_.release(), "we")); 530 if (fp_holder == nullptr) { 531 PLOG(ERROR) << "failed to write output."; 532 return false; 533 } 534 } 535 FILE* fp = fp_holder ? fp_holder.get() : stdout; 536 537 // 4. Add signal/periodic Events. 538 IOEventLoop* loop = event_selection_set_.GetIOEventLoop(); 539 if (interval_in_ms_ != 0) { 540 if (!loop->UsePreciseTimer()) { 541 return false; 542 } 543 } 544 std::chrono::time_point<std::chrono::steady_clock> start_time; 545 std::vector<CountersInfo> counters; 546 if (need_to_check_targets && !event_selection_set_.StopWhenNoMoreTargets()) { 547 return false; 548 } 549 auto exit_loop_callback = [loop]() { 550 return loop->ExitLoop(); 551 }; 552 if (!loop->AddSignalEvents({SIGCHLD, SIGINT, SIGTERM, SIGHUP}, exit_loop_callback)) { 553 return false; 554 } 555 if (stop_signal_fd_ != -1) { 556 if (!loop->AddReadEvent(stop_signal_fd_, exit_loop_callback)) { 557 return false; 558 } 559 } 560 if (duration_in_sec_ != 0) { 561 if (!loop->AddPeriodicEvent(SecondToTimeval(duration_in_sec_), exit_loop_callback)) { 562 return false; 563 } 564 } 565 auto print_counters = [&]() { 566 auto end_time = std::chrono::steady_clock::now(); 567 if (!event_selection_set_.ReadCounters(&counters)) { 568 return false; 569 } 570 double duration_in_sec = 571 std::chrono::duration_cast<std::chrono::duration<double>>(end_time - 572 start_time) 573 .count(); 574 if (interval_only_values_) { 575 AdjustToIntervalOnlyValues(counters); 576 } 577 if (!ShowCounters(counters, duration_in_sec, fp)) { 578 return false; 579 } 580 return true; 581 }; 582 583 if (interval_in_ms_ != 0) { 584 if (!loop->AddPeriodicEvent(SecondToTimeval(interval_in_ms_ / 1000.0), 585 print_counters)) { 586 return false; 587 } 588 } 589 590 // 5. Count events while workload running. 591 start_time = std::chrono::steady_clock::now(); 592 if (workload != nullptr && !workload->Start()) { 593 return false; 594 } 595 if (!loop->RunLoop()) { 596 return false; 597 } 598 599 // 6. Read and print counters. 600 if (interval_in_ms_ == 0) { 601 return print_counters(); 602 } 603 return true; 604 } 605 606 bool StatCommand::ParseOptions(const std::vector<std::string>& args, 607 std::vector<std::string>* non_option_args) { 608 std::set<pid_t> tid_set; 609 size_t i; 610 for (i = 0; i < args.size() && args[i].size() > 0 && args[i][0] == '-'; ++i) { 611 if (args[i] == "-a") { 612 system_wide_collection_ = true; 613 } else if (args[i] == "--app") { 614 if (!NextArgumentOrError(args, &i)) { 615 return false; 616 } 617 app_package_name_ = args[i]; 618 } else if (args[i] == "--cpu") { 619 if (!NextArgumentOrError(args, &i)) { 620 return false; 621 } 622 cpus_ = GetCpusFromString(args[i]); 623 } else if (args[i] == "--csv") { 624 csv_ = true; 625 } else if (args[i] == "--duration") { 626 if (!GetDoubleOption(args, &i, &duration_in_sec_, 1e-9)) { 627 return false; 628 } 629 } else if (args[i] == "--interval") { 630 if (!GetDoubleOption(args, &i, &interval_in_ms_, 1e-9)) { 631 return false; 632 } 633 } else if (args[i] == "--interval-only-values") { 634 interval_only_values_ = true; 635 } else if (args[i] == "-e") { 636 if (!NextArgumentOrError(args, &i)) { 637 return false; 638 } 639 std::vector<std::string> event_types = android::base::Split(args[i], ","); 640 for (auto& event_type : event_types) { 641 if (!event_selection_set_.AddEventType(event_type)) { 642 return false; 643 } 644 } 645 } else if (args[i] == "--group") { 646 if (!NextArgumentOrError(args, &i)) { 647 return false; 648 } 649 std::vector<std::string> event_types = android::base::Split(args[i], ","); 650 if (!event_selection_set_.AddEventGroup(event_types)) { 651 return false; 652 } 653 } else if (args[i] == "--in-app") { 654 in_app_context_ = true; 655 } else if (args[i] == "--no-inherit") { 656 child_inherit_ = false; 657 } else if (args[i] == "-o") { 658 if (!NextArgumentOrError(args, &i)) { 659 return false; 660 } 661 output_filename_ = args[i]; 662 } else if (args[i] == "--out-fd") { 663 int fd; 664 if (!GetUintOption(args, &i, &fd)) { 665 return false; 666 } 667 out_fd_.reset(fd); 668 } else if (args[i] == "--per-core") { 669 report_per_core_ = true; 670 } else if (args[i] == "--per-thread") { 671 report_per_thread_ = true; 672 } else if (args[i] == "-p") { 673 if (!NextArgumentOrError(args, &i)) { 674 return false; 675 } 676 std::set<pid_t> pids; 677 if (!GetValidThreadsFromThreadString(args[i], &pids)) { 678 return false; 679 } 680 event_selection_set_.AddMonitoredProcesses(pids); 681 } else if (args[i] == "--stop-signal-fd") { 682 int fd; 683 if (!GetUintOption(args, &i, &fd)) { 684 return false; 685 } 686 stop_signal_fd_.reset(fd); 687 } else if (args[i] == "-t") { 688 if (!NextArgumentOrError(args, &i)) { 689 return false; 690 } 691 std::set<pid_t> tids; 692 if (!GetValidThreadsFromThreadString(args[i], &tids)) { 693 return false; 694 } 695 event_selection_set_.AddMonitoredThreads(tids); 696 } else if (args[i] == "--tracepoint-events") { 697 if (!NextArgumentOrError(args, &i)) { 698 return false; 699 } 700 if (!SetTracepointEventsFilePath(args[i])) { 701 return false; 702 } 703 #if defined(__ANDROID__) 704 } else if (args[i] == "--use-devfreq-counters") { 705 use_devfreq_counters_ = true; 706 #endif 707 } else if (args[i] == "--verbose") { 708 verbose_mode_ = true; 709 } else { 710 ReportUnknownOption(args, i); 711 return false; 712 } 713 } 714 715 if (system_wide_collection_ && event_selection_set_.HasMonitoredTarget()) { 716 LOG(ERROR) << "Stat system wide and existing processes/threads can't be " 717 "used at the same time."; 718 return false; 719 } 720 if (system_wide_collection_ && !IsRoot()) { 721 LOG(ERROR) << "System wide profiling needs root privilege."; 722 return false; 723 } 724 725 non_option_args->clear(); 726 for (; i < args.size(); ++i) { 727 non_option_args->push_back(args[i]); 728 } 729 return true; 730 } 731 732 bool StatCommand::AddDefaultMeasuredEventTypes() { 733 for (auto& name : default_measured_event_types) { 734 // It is not an error when some event types in the default list are not 735 // supported by the kernel. 736 const EventType* type = FindEventTypeByName(name); 737 if (type != nullptr && 738 IsEventAttrSupported(CreateDefaultPerfEventAttr(*type), name)) { 739 if (!event_selection_set_.AddEventType(name)) { 740 return false; 741 } 742 } 743 } 744 if (event_selection_set_.empty()) { 745 LOG(ERROR) << "Failed to add any supported default measured types"; 746 return false; 747 } 748 return true; 749 } 750 751 void StatCommand::SetEventSelectionFlags() { 752 event_selection_set_.SetInherit(child_inherit_); 753 } 754 755 void StatCommand::MonitorEachThread() { 756 std::vector<pid_t> threads; 757 for (auto pid : event_selection_set_.GetMonitoredProcesses()) { 758 for (auto tid : GetThreadsInProcess(pid)) { 759 ThreadInfo info; 760 if (GetThreadName(tid, &info.name)) { 761 info.tid = tid; 762 info.pid = pid; 763 thread_info_[tid] = std::move(info); 764 threads.push_back(tid); 765 } 766 } 767 } 768 for (auto tid : event_selection_set_.GetMonitoredThreads()) { 769 ThreadInfo info; 770 if (ReadThreadNameAndPid(tid, &info.name, &info.pid)) { 771 info.tid = tid; 772 thread_info_[tid] = std::move(info); 773 threads.push_back(tid); 774 } 775 } 776 event_selection_set_.ClearMonitoredTargets(); 777 event_selection_set_.AddMonitoredThreads(threads); 778 } 779 780 void StatCommand::AdjustToIntervalOnlyValues(std::vector<CountersInfo>& counters) { 781 if (last_sum_values_.size() < counters.size()) { 782 last_sum_values_.resize(counters.size()); 783 } 784 for (size_t i = 0; i < counters.size(); i++) { 785 std::vector<CounterInfo>& counters_per_event = counters[i].counters; 786 std::vector<CounterSum>& last_sum = last_sum_values_[i]; 787 788 if (last_sum.size() < counters_per_event.size()) { 789 last_sum.resize(counters_per_event.size()); 790 } 791 for (size_t j = 0; j < counters_per_event.size(); j++) { 792 PerfCounter& counter = counters_per_event[j].counter; 793 CounterSum new_sum; 794 new_sum.FromCounter(counter); 795 CounterSum delta = new_sum - last_sum[j]; 796 delta.ToCounter(counter); 797 last_sum[j] = new_sum; 798 } 799 } 800 } 801 802 bool StatCommand::ShowCounters(const std::vector<CountersInfo>& counters, 803 double duration_in_sec, FILE* fp) { 804 if (csv_) { 805 fprintf(fp, "Performance counter statistics,\n"); 806 } else { 807 fprintf(fp, "Performance counter statistics:\n\n"); 808 } 809 810 if (verbose_mode_) { 811 for (auto& counters_info : counters) { 812 for (auto& counter_info : counters_info.counters) { 813 if (csv_) { 814 fprintf(fp, "%s,tid,%d,cpu,%d,count,%" PRIu64 ",time_enabled,%" PRIu64 815 ",time running,%" PRIu64 ",id,%" PRIu64 ",\n", 816 counters_info.event_name.c_str(), counter_info.tid, 817 counter_info.cpu, counter_info.counter.value, 818 counter_info.counter.time_enabled, 819 counter_info.counter.time_running, counter_info.counter.id); 820 } else { 821 fprintf(fp, 822 "%s(tid %d, cpu %d): count %" PRIu64 ", time_enabled %" PRIu64 823 ", time running %" PRIu64 ", id %" PRIu64 "\n", 824 counters_info.event_name.c_str(), counter_info.tid, 825 counter_info.cpu, counter_info.counter.value, 826 counter_info.counter.time_enabled, 827 counter_info.counter.time_running, counter_info.counter.id); 828 } 829 } 830 } 831 } 832 833 CounterSummaryBuilder builder(report_per_thread_, report_per_core_, csv_, thread_info_); 834 for (const auto& info : counters) { 835 builder.AddCountersForOneEventType(info); 836 } 837 CounterSummaries summaries(builder.Build(), csv_); 838 summaries.AutoGenerateSummaries(); 839 summaries.GenerateComments(duration_in_sec); 840 summaries.Show(fp); 841 842 if (csv_) 843 fprintf(fp, "Total test time,%lf,seconds,\n", duration_in_sec); 844 else 845 fprintf(fp, "\nTotal test time: %lf seconds.\n", duration_in_sec); 846 847 const char* COUNTER_MULTIPLEX_INFO = 848 "probably caused by hardware counter multiplexing (less counters than events).\n" 849 "Try --use-devfreq-counters if on a rooted device."; 850 851 if (cpus_ == std::vector<int>(1, -1) || 852 event_selection_set_.GetMonitoredThreads() == std::set<pid_t>({-1})) { 853 // We either monitor a thread on all cpus, or monitor all threads on a cpu. In both cases, 854 // if percentages < 100%, probably it is caused by hardware counter multiplexing. 855 bool counters_always_available = true; 856 for (const auto& summary : summaries.Summaries()) { 857 if (!summary.IsMonitoredAllTheTime()) { 858 counters_always_available = false; 859 break; 860 } 861 } 862 if (!counters_always_available) { 863 LOG(WARNING) << "Percentages < 100% means some events only run a subset of enabled time,\n" 864 << COUNTER_MULTIPLEX_INFO; 865 } 866 } else if (report_per_thread_) { 867 // We monitor each thread on each cpu. 868 LOG(INFO) << "A percentage represents runtime_on_a_cpu / runtime_on_all_cpus for each thread.\n" 869 << "If percentage sum of a thread < 99%, or report for a running thread is missing,\n" 870 << COUNTER_MULTIPLEX_INFO; 871 } else { 872 // We monitor some threads on each cpu. 873 LOG(INFO) << "A percentage represents runtime_on_a_cpu / runtime_on_all_cpus for monitored\n" 874 << "threads. If percentage sum < 99%, or report for an event is missing,\n" 875 << COUNTER_MULTIPLEX_INFO; 876 } 877 return true; 878 } 879 880 } // namespace 881 882 void RegisterStatCommand() { 883 RegisterCommand("stat", 884 [] { return std::unique_ptr<Command>(new StatCommand); }); 885 } 886