1 // Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "perf_parser.h"
6 
7 #include <algorithm>
8 #include <cstdio>
9 #include <set>
10 
11 #include "base/logging.h"
12 
13 #include "address_mapper.h"
14 #include "quipper_string.h"
15 #include "perf_utils.h"
16 
17 namespace quipper {
18 
19 namespace {
20 
21 struct EventAndTime {
22   ParsedEvent* event;
23   uint64_t time;
24 };
25 
26 // Returns true if |e1| has an earlier timestamp than |e2|.  The args are const
27 // pointers instead of references because of the way this function is used when
28 // calling std::stable_sort.
CompareParsedEventTimes(const std::unique_ptr<EventAndTime> & e1,const std::unique_ptr<EventAndTime> & e2)29 bool CompareParsedEventTimes(const std::unique_ptr<EventAndTime>& e1,
30                              const std::unique_ptr<EventAndTime>& e2) {
31   return (e1->time < e2->time);
32 }
33 
34 // Kernel MMAP entry pid appears as -1
35 const uint32_t kKernelPid = UINT32_MAX;
36 
37 // Name and ID of the kernel swapper process.
38 const char kSwapperCommandName[] = "swapper";
39 const uint32_t kSwapperPid = 0;
40 
IsNullBranchStackEntry(const struct branch_entry & entry)41 bool IsNullBranchStackEntry(const struct branch_entry& entry) {
42   return (!entry.from && !entry.to);
43 }
44 
45 }  // namespace
46 
PerfParser()47 PerfParser::PerfParser()
48     : kernel_mapper_(new AddressMapper)
49 {}
50 
~PerfParser()51 PerfParser::~PerfParser() {}
52 
PerfParser(const PerfParser::Options & options)53 PerfParser::PerfParser(const PerfParser::Options& options) {
54   options_ = options;
55 }
56 
set_options(const PerfParser::Options & options)57 void PerfParser::set_options(const PerfParser::Options& options) {
58   options_ = options;
59 }
60 
ParseRawEvents()61 bool PerfParser::ParseRawEvents() {
62   process_mappers_.clear();
63   parsed_events_.resize(events_.size());
64   for (size_t i = 0; i < events_.size(); ++i) {
65     ParsedEvent& parsed_event = parsed_events_[i];
66     parsed_event.raw_event = events_[i].get();
67   }
68   MaybeSortParsedEvents();
69   if (!ProcessEvents()) {
70     return false;
71   }
72 
73   if (!options_.discard_unused_events)
74     return true;
75 
76   // Some MMAP/MMAP2 events' mapped regions will not have any samples. These
77   // MMAP/MMAP2 events should be dropped. |parsed_events_| should be
78   // reconstructed without these events.
79   size_t write_index = 0;
80   size_t read_index;
81   for (read_index = 0; read_index < parsed_events_.size(); ++read_index) {
82     const ParsedEvent& event = parsed_events_[read_index];
83     if ((event.raw_event->header.type == PERF_RECORD_MMAP ||
84          event.raw_event->header.type == PERF_RECORD_MMAP2) &&
85         event.num_samples_in_mmap_region == 0) {
86       continue;
87     }
88     if (read_index != write_index)
89       parsed_events_[write_index] = event;
90     ++write_index;
91   }
92   CHECK_LE(write_index, parsed_events_.size());
93   parsed_events_.resize(write_index);
94 
95   // Now regenerate the sorted event list again.  These are pointers to events
96   // so they must be regenerated after a resize() of the ParsedEvent vector.
97   MaybeSortParsedEvents();
98 
99   return true;
100 }
101 
MaybeSortParsedEvents()102 void PerfParser::MaybeSortParsedEvents() {
103   if (!(sample_type_ & PERF_SAMPLE_TIME)) {
104     parsed_events_sorted_by_time_.resize(parsed_events_.size());
105     for (size_t i = 0; i < parsed_events_.size(); ++i) {
106       parsed_events_sorted_by_time_[i] = &parsed_events_[i];
107     }
108     return;
109   }
110   std::vector<std::unique_ptr<EventAndTime>> events_and_times;
111   events_and_times.resize(parsed_events_.size());
112   for (size_t i = 0; i < parsed_events_.size(); ++i) {
113     std::unique_ptr<EventAndTime> event_and_time(new EventAndTime);
114 
115     // Store the timestamp and event pointer in an array.
116     event_and_time->event = &parsed_events_[i];
117 
118     struct perf_sample sample_info;
119     PerfSampleCustodian custodian(sample_info);
120     CHECK(ReadPerfSampleInfo(*parsed_events_[i].raw_event, &sample_info));
121     event_and_time->time = sample_info.time;
122 
123     events_and_times[i] = std::move(event_and_time);
124   }
125   // Sort the events based on timestamp, and then populate the sorted event
126   // vector in sorted order.
127   std::stable_sort(events_and_times.begin(), events_and_times.end(),
128                    CompareParsedEventTimes);
129 
130   parsed_events_sorted_by_time_.resize(events_and_times.size());
131   for (unsigned int i = 0; i < events_and_times.size(); ++i) {
132     parsed_events_sorted_by_time_[i] = events_and_times[i]->event;
133   }
134 }
135 
ProcessEvents()136 bool PerfParser::ProcessEvents() {
137   memset(&stats_, 0, sizeof(stats_));
138 
139   stats_.did_remap = false;   // Explicitly clear the remap flag.
140 
141   // Pid 0 is called the swapper process. Even though perf does not record a
142   // COMM event for pid 0, we act like we did receive a COMM event for it. Perf
143   // does this itself, example:
144   //   http://lxr.free-electrons.com/source/tools/perf/util/session.c#L1120
145   commands_.insert(kSwapperCommandName);
146   pidtid_to_comm_map_[std::make_pair(kSwapperPid, kSwapperPid)] =
147       &(*commands_.find(kSwapperCommandName));
148 
149   // NB: Not necessarily actually sorted by time.
150   for (unsigned int i = 0; i < parsed_events_sorted_by_time_.size(); ++i) {
151     ParsedEvent& parsed_event = *parsed_events_sorted_by_time_[i];
152     event_t& event = *parsed_event.raw_event;
153     switch (event.header.type) {
154       case PERF_RECORD_SAMPLE:
155         // SAMPLE doesn't have any fields to log at a fixed,
156         // previously-endian-swapped location. This used to log ip.
157         VLOG(1) << "SAMPLE";
158         ++stats_.num_sample_events;
159 
160         if (MapSampleEvent(&parsed_event)) {
161           ++stats_.num_sample_events_mapped;
162         }
163         break;
164       case PERF_RECORD_MMAP: {
165         VLOG(1) << "MMAP: " << event.mmap.filename;
166         ++stats_.num_mmap_events;
167         // Use the array index of the current mmap event as a unique identifier.
168         CHECK(MapMmapEvent(&event.mmap, i)) << "Unable to map MMAP event!";
169         // No samples in this MMAP region yet, hopefully.
170         parsed_event.num_samples_in_mmap_region = 0;
171         DSOInfo dso_info;
172         // TODO(sque): Add Build ID as well.
173         dso_info.name = event.mmap.filename;
174         dso_set_.insert(dso_info);
175         break;
176       }
177       case PERF_RECORD_MMAP2: {
178         VLOG(1) << "MMAP2: " << event.mmap2.filename;
179         ++stats_.num_mmap_events;
180         // Use the array index of the current mmap event as a unique identifier.
181         CHECK(MapMmapEvent(&event.mmap2, i)) << "Unable to map MMAP2 event!";
182         // No samples in this MMAP region yet, hopefully.
183         parsed_event.num_samples_in_mmap_region = 0;
184         DSOInfo dso_info;
185         // TODO(sque): Add Build ID as well.
186         dso_info.name = event.mmap2.filename;
187         dso_set_.insert(dso_info);
188         break;
189       }
190       case PERF_RECORD_FORK:
191         VLOG(1) << "FORK: " << event.fork.ppid << ":" << event.fork.ptid
192                 << " -> " << event.fork.pid << ":" << event.fork.tid;
193         ++stats_.num_fork_events;
194         CHECK(MapForkEvent(event.fork)) << "Unable to map FORK event!";
195         break;
196       case PERF_RECORD_EXIT:
197         // EXIT events have the same structure as FORK events.
198         VLOG(1) << "EXIT: " << event.fork.ppid << ":" << event.fork.ptid;
199         ++stats_.num_exit_events;
200         break;
201       case PERF_RECORD_COMM:
202         VLOG(1) << "COMM: " << event.comm.pid << ":" << event.comm.tid << ": "
203                 << event.comm.comm;
204         ++stats_.num_comm_events;
205         CHECK(MapCommEvent(event.comm));
206         commands_.insert(event.comm.comm);
207         pidtid_to_comm_map_[std::make_pair(event.comm.pid, event.comm.tid)] =
208             &(*commands_.find(event.comm.comm));
209         break;
210       case PERF_RECORD_LOST:
211       case PERF_RECORD_THROTTLE:
212       case PERF_RECORD_UNTHROTTLE:
213       case PERF_RECORD_READ:
214       case PERF_RECORD_MAX:
215         VLOG(1) << "Parsed event type: " << event.header.type
216                 << ". Doing nothing.";
217         break;
218     case SIMPLE_PERF_RECORD_KERNEL_SYMBOL:
219     case SIMPLE_PERF_RECORD_DSO:
220     case SIMPLE_PERF_RECORD_SYMBOL:
221     case SIMPLE_PERF_RECORD_SPLIT:
222     case SIMPLE_PERF_RECORD_SPLIT_END:
223       break;
224       default:
225         LOG(ERROR) << "Unknown event type: " << event.header.type;
226         return false;
227     }
228   }
229 
230   // Print stats collected from parsing.
231   DLOG(INFO) << "Parser processed: "
232             << stats_.num_mmap_events << " MMAP/MMAP2 events, "
233             << stats_.num_comm_events << " COMM events, "
234             << stats_.num_fork_events << " FORK events, "
235             << stats_.num_exit_events << " EXIT events, "
236             << stats_.num_sample_events << " SAMPLE events, "
237             << stats_.num_sample_events_mapped << " of these were mapped";
238 
239   float sample_mapping_percentage =
240       static_cast<float>(stats_.num_sample_events_mapped) /
241       stats_.num_sample_events * 100.;
242   float threshold = options_.sample_mapping_percentage_threshold;
243   if (sample_mapping_percentage < threshold) {
244     LOG(WARNING) << "Mapped " << static_cast<int>(sample_mapping_percentage)
245                << "% of samples, expected at least "
246                << static_cast<int>(threshold) << "%";
247     return false;
248   }
249   stats_.did_remap = options_.do_remap;
250   return true;
251 }
252 
MapSampleEvent(ParsedEvent * parsed_event)253 bool PerfParser::MapSampleEvent(ParsedEvent* parsed_event) {
254   bool mapping_failed = false;
255 
256   // Find the associated command.
257   if (!(sample_type_ & PERF_SAMPLE_IP && sample_type_ & PERF_SAMPLE_TID))
258     return false;
259   perf_sample sample_info;
260   PerfSampleCustodian custodian(sample_info);
261   if (!ReadPerfSampleInfo(*parsed_event->raw_event, &sample_info))
262     return false;
263   PidTid pidtid = std::make_pair(sample_info.pid, sample_info.tid);
264   const auto comm_iter = pidtid_to_comm_map_.find(pidtid);
265   if (comm_iter != pidtid_to_comm_map_.end()) {
266     parsed_event->set_command(comm_iter->second);
267   }
268 
269   const uint64_t unmapped_event_ip = sample_info.ip;
270 
271   // Map the event IP itself.
272   if (!MapIPAndPidAndGetNameAndOffset(sample_info.ip,
273                                       sample_info.pid,
274                                       &sample_info.ip,
275                                       &parsed_event->dso_and_offset)) {
276     mapping_failed = true;
277   }
278 
279   if (sample_info.callchain &&
280       !MapCallchain(sample_info.ip,
281                     sample_info.pid,
282                     unmapped_event_ip,
283                     sample_info.callchain,
284                     parsed_event)) {
285     mapping_failed = true;
286   }
287 
288   if (sample_info.branch_stack &&
289       !MapBranchStack(sample_info.pid,
290                       sample_info.branch_stack,
291                       parsed_event)) {
292     mapping_failed = true;
293   }
294 
295   if (options_.do_remap) {
296     // Write the remapped data back to the raw event regardless of
297     // whether it was entirely successfully remapped.  A single failed
298     // remap should not invalidate all the other remapped entries.
299     if (!WritePerfSampleInfo(sample_info, parsed_event->raw_event)) {
300       LOG(ERROR) << "Failed to write back remapped sample info.";
301       return false;
302     }
303   }
304 
305   return !mapping_failed;
306 }
307 
MapCallchain(const uint64_t ip,const uint32_t pid,const uint64_t original_event_addr,struct ip_callchain * callchain,ParsedEvent * parsed_event)308 bool PerfParser::MapCallchain(const uint64_t ip,
309                               const uint32_t pid,
310                               const uint64_t original_event_addr,
311                               struct ip_callchain* callchain,
312                               ParsedEvent* parsed_event) {
313   if (!callchain) {
314     LOG(ERROR) << "NULL call stack data.";
315     return false;
316   }
317 
318   bool mapping_failed = false;
319 
320   // If the callchain's length is 0, there is no work to do.
321   if (callchain->nr == 0)
322     return true;
323 
324   // Keeps track of whether the current entry is kernel or user.
325   parsed_event->callchain.resize(callchain->nr);
326   int num_entries_mapped = 0;
327   for (unsigned int j = 0; j < callchain->nr; ++j) {
328     uint64_t entry = callchain->ips[j];
329     // When a callchain context entry is found, do not attempt to symbolize it.
330     if (entry >= PERF_CONTEXT_MAX) {
331       continue;
332     }
333     // The sample address has already been mapped so no need to map it.
334     if (entry == original_event_addr) {
335       callchain->ips[j] = ip;
336       continue;
337     }
338     if (!MapIPAndPidAndGetNameAndOffset(
339             entry,
340             pid,
341             &callchain->ips[j],
342             &parsed_event->callchain[num_entries_mapped++])) {
343       mapping_failed = true;
344     }
345   }
346   // Not all the entries were mapped.  Trim |parsed_event->callchain| to
347   // remove unused entries at the end.
348   parsed_event->callchain.resize(num_entries_mapped);
349 
350   return !mapping_failed;
351 }
352 
MapBranchStack(const uint32_t pid,struct branch_stack * branch_stack,ParsedEvent * parsed_event)353 bool PerfParser::MapBranchStack(const uint32_t pid,
354                                 struct branch_stack* branch_stack,
355                                 ParsedEvent* parsed_event) {
356   if (!branch_stack) {
357     LOG(ERROR) << "NULL branch stack data.";
358     return false;
359   }
360 
361   // First, trim the branch stack to remove trailing null entries.
362   size_t trimmed_size = 0;
363   for (size_t i = 0; i < branch_stack->nr; ++i) {
364     // Count the number of non-null entries before the first null entry.
365     if (IsNullBranchStackEntry(branch_stack->entries[i])) {
366       break;
367     }
368     ++trimmed_size;
369   }
370 
371   // If a null entry was found, make sure all subsequent null entries are NULL
372   // as well.
373   for (size_t i = trimmed_size; i < branch_stack->nr; ++i) {
374     const struct branch_entry& entry = branch_stack->entries[i];
375     if (!IsNullBranchStackEntry(entry)) {
376       LOG(ERROR) << "Non-null branch stack entry found after null entry: "
377                  << reinterpret_cast<void*>(entry.from) << " -> "
378                  << reinterpret_cast<void*>(entry.to);
379       return false;
380     }
381   }
382 
383   // Map branch stack addresses.
384   parsed_event->branch_stack.resize(trimmed_size);
385   for (unsigned int i = 0; i < trimmed_size; ++i) {
386     struct branch_entry& entry = branch_stack->entries[i];
387     ParsedEvent::BranchEntry& parsed_entry = parsed_event->branch_stack[i];
388     if (!MapIPAndPidAndGetNameAndOffset(entry.from,
389                                         pid,
390                                         &entry.from,
391                                         &parsed_entry.from)) {
392       return false;
393     }
394     if (!MapIPAndPidAndGetNameAndOffset(entry.to,
395                                         pid,
396                                         &entry.to,
397                                         &parsed_entry.to)) {
398       return false;
399     }
400     parsed_entry.predicted = entry.flags.predicted;
401     // Either predicted or mispredicted, not both. But don't use a CHECK here,
402     // just exit gracefully because it's a minor issue.
403     if (entry.flags.predicted == entry.flags.mispred) {
404       LOG(ERROR) << "Branch stack entry predicted and mispred flags "
405                  << "both have value " << entry.flags.mispred;
406       return false;
407     }
408   }
409 
410   return true;
411 }
412 
MapIPAndPidAndGetNameAndOffset(uint64_t ip,uint32_t pid,uint64_t * new_ip,ParsedEvent::DSOAndOffset * dso_and_offset)413 bool PerfParser::MapIPAndPidAndGetNameAndOffset(
414     uint64_t ip,
415     uint32_t pid,
416     uint64_t* new_ip,
417     ParsedEvent::DSOAndOffset* dso_and_offset) {
418 
419   // Attempt to find the synthetic address of the IP sample in this order:
420   // 1. Address space of its own process.
421   // 2. Address space of the kernel.
422 
423   uint64_t mapped_addr = 0;
424 
425   // Sometimes the first event we see is a SAMPLE event and we don't have the
426   // time to create an address mapper for a process. Example, for pid 0.
427   AddressMapper* mapper = GetOrCreateProcessMapper(pid).first;
428   bool mapped = mapper->GetMappedAddress(ip, &mapped_addr);
429   if (!mapped) {
430     mapper = kernel_mapper_.get();
431     mapped = mapper->GetMappedAddress(ip, &mapped_addr);
432   }
433 
434   // TODO(asharif): What should we do when we cannot map a SAMPLE event?
435   if (mapped) {
436     if (dso_and_offset) {
437       uint64_t id = kuint64max;
438       CHECK(mapper->GetMappedIDAndOffset(ip, &id, &dso_and_offset->offset_));
439       // Make sure the ID points to a valid event.
440       CHECK_LE(id, parsed_events_sorted_by_time_.size());
441       ParsedEvent* parsed_event = parsed_events_sorted_by_time_[id];
442       const event_t* raw_event = parsed_event->raw_event;
443 
444       DSOInfo dso_info;
445       if (raw_event->header.type == PERF_RECORD_MMAP) {
446         dso_info.name = raw_event->mmap.filename;
447       } else if (raw_event->header.type == PERF_RECORD_MMAP2) {
448         dso_info.name = raw_event->mmap2.filename;
449       } else {
450         LOG(FATAL) << "Expected MMAP or MMAP2 event";
451       }
452 
453       // Find the mmap DSO filename in the set of known DSO names.
454       // TODO(sque): take build IDs into account.
455       std::set<DSOInfo>::const_iterator dso_iter = dso_set_.find(dso_info);
456       CHECK(dso_iter != dso_set_.end());
457       dso_and_offset->dso_info_ = &(*dso_iter);
458 
459       ++parsed_event->num_samples_in_mmap_region;
460     }
461     if (options_.do_remap)
462       *new_ip = mapped_addr;
463   }
464   return mapped;
465 }
466 
MapMmapEvent(uint64_t id,uint32_t pid,uint64_t * p_start,uint64_t * p_len,uint64_t * p_pgoff)467 bool PerfParser::MapMmapEvent(uint64_t id,
468                               uint32_t pid,
469                               uint64_t* p_start,
470                               uint64_t* p_len,
471                               uint64_t* p_pgoff)
472 {
473   // We need to hide only the real kernel addresses.  However, to make things
474   // more secure, and make the mapping idempotent, we should remap all
475   // addresses, both kernel and non-kernel.
476   AddressMapper* mapper =
477       (pid == kKernelPid ? kernel_mapper_.get() :
478        GetOrCreateProcessMapper(pid).first);
479 
480   uint64_t start = *p_start;
481   uint64_t len = *p_len;
482   uint64_t pgoff = *p_pgoff;
483 
484   // |id| == 0 corresponds to the kernel mmap. We have several cases here:
485   //
486   // For ARM and x86, in sudo mode, pgoff == start, example:
487   // start=0x80008200
488   // pgoff=0x80008200
489   // len  =0xfffffff7ff7dff
490   //
491   // For x86-64, in sudo mode, pgoff is between start and start + len. SAMPLE
492   // events lie between pgoff and pgoff + length of the real kernel binary,
493   // example:
494   // start=0x3bc00000
495   // pgoff=0xffffffffbcc00198
496   // len  =0xffffffff843fffff
497   // SAMPLE events will be found after pgoff. For kernels with ASLR, pgoff will
498   // be something only visible to the root user, and will be randomized at
499   // startup. With |remap| set to true, we should hide pgoff in this case. So we
500   // normalize all SAMPLE events relative to pgoff.
501   //
502   // For non-sudo mode, the kernel will be mapped from 0 to the pointer limit,
503   // example:
504   // start=0x0
505   // pgoff=0x0
506   // len  =0xffffffff
507   if (id == 0) {
508     // If pgoff is between start and len, we normalize the event by setting
509     // start to be pgoff just like how it is for ARM and x86. We also set len to
510     // be a much smaller number (closer to the real length of the kernel binary)
511     // because SAMPLEs are actually only seen between |event->pgoff| and
512     // |event->pgoff + kernel text size|.
513     if (pgoff > start && pgoff < start + len) {
514       len = len + start - pgoff;
515       start = pgoff;
516     }
517     // For kernels with ALSR pgoff is critical information that should not be
518     // revealed when |remap| is true.
519     pgoff = 0;
520   }
521 
522   if (!mapper->MapWithID(start, len, id, pgoff, true)) {
523     mapper->DumpToLog();
524     return false;
525   }
526 
527   if (options_.do_remap) {
528     uint64_t mapped_addr;
529     CHECK(mapper->GetMappedAddress(start, &mapped_addr));
530     *p_start = mapped_addr;
531     *p_len = len;
532     *p_pgoff = pgoff;
533   }
534   return true;
535 }
536 
GetOrCreateProcessMapper(uint32_t pid,uint32_t * ppid)537 std::pair<AddressMapper*, bool> PerfParser::GetOrCreateProcessMapper(
538     uint32_t pid, uint32_t *ppid) {
539   const auto& search = process_mappers_.find(pid);
540   if (search != process_mappers_.end()) {
541     return std::make_pair(search->second.get(), false);
542   }
543 
544   std::unique_ptr<AddressMapper> mapper;
545   const auto& parent_mapper = (ppid ? process_mappers_.find(*ppid) : process_mappers_.end());
546   if (parent_mapper != process_mappers_.end())
547       mapper.reset(new AddressMapper(*parent_mapper->second));
548   else
549     mapper.reset(new AddressMapper());
550 
551   const auto inserted =
552       process_mappers_.insert(search, std::make_pair(pid, std::move(mapper)));
553   return std::make_pair(inserted->second.get(), true);
554 }
555 
MapCommEvent(const struct comm_event & event)556 bool PerfParser::MapCommEvent(const struct comm_event& event) {
557   GetOrCreateProcessMapper(event.pid);
558   return true;
559 }
560 
MapForkEvent(const struct fork_event & event)561 bool PerfParser::MapForkEvent(const struct fork_event& event) {
562   PidTid parent = std::make_pair(event.ppid, event.ptid);
563   PidTid child = std::make_pair(event.pid, event.tid);
564   if (parent != child &&
565       pidtid_to_comm_map_.find(parent) != pidtid_to_comm_map_.end()) {
566     pidtid_to_comm_map_[child] = pidtid_to_comm_map_[parent];
567   }
568 
569   const uint32_t pid = event.pid;
570 
571   // If the parent and child pids are the same, this is just a new thread
572   // within the same process, so don't do anything.
573   if (event.ppid == pid)
574     return true;
575 
576   uint32_t ppid = event.ppid;
577   if (!GetOrCreateProcessMapper(pid, &ppid).second) {
578     DLOG(INFO) << "Found an existing process mapper with pid: " << pid;
579   }
580 
581   return true;
582 }
583 
584 }  // namespace quipper
585