1 // Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "perf_reader.h"
6
7 #include <byteswap.h>
8 #include <limits.h>
9
10 #include <bitset>
11 #include <cstdio>
12 #include <cstdlib>
13 #include <cstring>
14 #include <vector>
15
16 #define LOG_TAG "perf_reader"
17
18 #include "base/logging.h"
19
20 #include "quipper_string.h"
21 #include "perf_utils.h"
22
23 namespace quipper {
24
25 struct BufferWithSize {
26 char* ptr;
27 size_t size;
28 };
29
30 // If the buffer is read-only, it is not sufficient to mark the previous struct
31 // as const, as this only means that the pointer cannot be changed, and says
32 // nothing about the contents of the buffer. So, we need another struct.
33 struct ConstBufferWithSize {
34 const char* ptr;
35 size_t size;
36 };
37
38 namespace {
39
40 // The type of the number of string data, found in the command line metadata in
41 // the perf data file.
42 typedef u32 num_string_data_type;
43
44 // Types of the event desc fields that are not found in other structs.
45 typedef u32 event_desc_num_events;
46 typedef u32 event_desc_attr_size;
47 typedef u32 event_desc_num_unique_ids;
48
49 // The type of the number of nodes field in NUMA topology.
50 typedef u32 numa_topology_num_nodes_type;
51
52 // A mask that is applied to metadata_mask_ in order to get a mask for
53 // only the metadata supported by quipper.
54 const uint32_t kSupportedMetadataMask =
55 1 << HEADER_TRACING_DATA |
56 1 << HEADER_BUILD_ID |
57 1 << HEADER_HOSTNAME |
58 1 << HEADER_OSRELEASE |
59 1 << HEADER_VERSION |
60 1 << HEADER_ARCH |
61 1 << HEADER_NRCPUS |
62 1 << HEADER_CPUDESC |
63 1 << HEADER_CPUID |
64 1 << HEADER_TOTAL_MEM |
65 1 << HEADER_CMDLINE |
66 1 << HEADER_EVENT_DESC |
67 1 << HEADER_CPU_TOPOLOGY |
68 1 << HEADER_NUMA_TOPOLOGY |
69 1 << HEADER_BRANCH_STACK;
70
71 // By default, the build ID event has PID = -1.
72 const uint32_t kDefaultBuildIDEventPid = static_cast<uint32_t>(-1);
73
74 template <class T>
ByteSwap(T * input)75 void ByteSwap(T* input) {
76 switch (sizeof(T)) {
77 case sizeof(uint8_t):
78 LOG(WARNING) << "Attempting to byte swap on a single byte.";
79 break;
80 case sizeof(uint16_t):
81 *input = bswap_16(*input);
82 break;
83 case sizeof(uint32_t):
84 *input = bswap_32(*input);
85 break;
86 case sizeof(uint64_t):
87 *input = bswap_64(*input);
88 break;
89 default:
90 LOG(FATAL) << "Invalid size for byte swap: " << sizeof(T) << " bytes";
91 break;
92 }
93 }
94
MaybeSwap(u64 value,bool swap)95 u64 MaybeSwap(u64 value, bool swap) {
96 if (swap)
97 return bswap_64(value);
98 return value;
99 }
100
MaybeSwap(u32 value,bool swap)101 u32 MaybeSwap(u32 value, bool swap) {
102 if (swap)
103 return bswap_32(value);
104 return value;
105 }
106
ReverseByte(u8 x)107 u8 ReverseByte(u8 x) {
108 x = (x & 0xf0) >> 4 | (x & 0x0f) << 4; // exchange nibbles
109 x = (x & 0xcc) >> 2 | (x & 0x33) << 2; // exchange pairs
110 x = (x & 0xaa) >> 1 | (x & 0x55) << 1; // exchange neighbors
111 return x;
112 }
113
114 // If field points to the start of a bitfield padded to len bytes, this
115 // performs an endian swap of the bitfield, assuming the compiler that produced
116 // it conforms to the same ABI (bitfield layout is not completely specified by
117 // the language).
SwapBitfieldOfBits(u8 * field,size_t len)118 void SwapBitfieldOfBits(u8* field, size_t len) {
119 for (size_t i = 0; i < len; i++) {
120 field[i] = ReverseByte(field[i]);
121 }
122 }
123
124 // The code currently assumes that the compiler will not add any padding to the
125 // various structs. These CHECKs make sure that this is true.
CheckNoEventHeaderPadding()126 void CheckNoEventHeaderPadding() {
127 perf_event_header header;
128 CHECK_EQ(sizeof(header),
129 sizeof(header.type) + sizeof(header.misc) + sizeof(header.size));
130 }
131
CheckNoPerfEventAttrPadding()132 void CheckNoPerfEventAttrPadding() {
133 perf_event_attr attr;
134 CHECK_EQ(sizeof(attr),
135 (reinterpret_cast<u64>(&attr.__reserved_2) -
136 reinterpret_cast<u64>(&attr)) +
137 sizeof(attr.__reserved_2));
138 }
139
CheckNoEventTypePadding()140 void CheckNoEventTypePadding() {
141 perf_trace_event_type event_type;
142 CHECK_EQ(sizeof(event_type),
143 sizeof(event_type.event_id) + sizeof(event_type.name));
144 }
145
CheckNoBuildIDEventPadding()146 void CheckNoBuildIDEventPadding() {
147 build_id_event event;
148 CHECK_EQ(sizeof(event),
149 sizeof(event.header.type) + sizeof(event.header.misc) +
150 sizeof(event.header.size) + sizeof(event.pid) +
151 sizeof(event.build_id));
152 }
153
154 // Creates/updates a build id event with |build_id| and |filename|.
155 // Passing "" to |build_id| or |filename| will leave the corresponding field
156 // unchanged (in which case |event| must be non-null).
157 // If |event| is null or is not large enough, a new event will be created.
158 // In this case, if |event| is non-null, it will be freed.
159 // Otherwise, updates the fields of the existing event.
160 // |new_misc| indicates kernel vs user space, and is only used to fill in the
161 // |header.misc| field of new events.
162 // In either case, returns a pointer to the event containing the updated data,
163 // or NULL in the case of a failure.
CreateOrUpdateBuildID(const string & build_id,const string & filename,uint16_t new_misc,build_id_event * event)164 build_id_event* CreateOrUpdateBuildID(const string& build_id,
165 const string& filename,
166 uint16_t new_misc,
167 build_id_event* event) {
168 // When creating an event from scratch, build id and filename must be present.
169 if (!event && (build_id.empty() || filename.empty()))
170 return NULL;
171 size_t new_len = GetUint64AlignedStringLength(
172 filename.empty() ? event->filename : filename);
173
174 // If event is null, or we don't have enough memory, allocate more memory, and
175 // switch the new pointer with the existing pointer.
176 size_t new_size = sizeof(*event) + new_len;
177 if (!event || new_size > event->header.size) {
178 build_id_event* new_event = CallocMemoryForBuildID(new_size);
179
180 if (event) {
181 // Copy over everything except the filename and free the event.
182 // It is guaranteed that we are changing the filename - otherwise, the old
183 // size and the new size would be equal.
184 *new_event = *event;
185 free(event);
186 } else {
187 // Fill in the fields appropriately.
188 new_event->header.type = HEADER_BUILD_ID;
189 new_event->header.misc = new_misc;
190 new_event->pid = kDefaultBuildIDEventPid;
191 }
192 event = new_event;
193 }
194
195 // Here, event is the pointer to the build_id_event that we are keeping.
196 // Update the event's size, build id, and filename.
197 if (!build_id.empty() &&
198 !StringToHex(build_id, event->build_id, arraysize(event->build_id))) {
199 free(event);
200 return NULL;
201 }
202
203 if (!filename.empty())
204 CHECK_GT(snprintf(event->filename, new_len, "%s", filename.c_str()), 0);
205
206 event->header.size = new_size;
207 return event;
208 }
209
210 // Reads |size| bytes from |buffer| into |dest| and advances |src_offset|.
ReadDataFromBuffer(const ConstBufferWithSize & buffer,size_t size,const string & value_name,size_t * src_offset,void * dest)211 bool ReadDataFromBuffer(const ConstBufferWithSize& buffer,
212 size_t size,
213 const string& value_name,
214 size_t* src_offset,
215 void* dest) {
216 size_t end_offset = *src_offset + size / sizeof(*buffer.ptr);
217 if (buffer.size < end_offset) {
218 LOG(ERROR) << "Not enough bytes to read " << value_name
219 << ". Requested " << size << " bytes";
220 return false;
221 }
222 memcpy(dest, buffer.ptr + *src_offset, size);
223 *src_offset = end_offset;
224 return true;
225 }
226
227 // Reads a CStringWithLength from |buffer| into |dest|, and advances the offset.
ReadStringFromBuffer(const ConstBufferWithSize & buffer,bool is_cross_endian,size_t * offset,CStringWithLength * dest)228 bool ReadStringFromBuffer(const ConstBufferWithSize& buffer,
229 bool is_cross_endian,
230 size_t* offset,
231 CStringWithLength* dest) {
232 if (!ReadDataFromBuffer(buffer, sizeof(dest->len), "string length",
233 offset, &dest->len)) {
234 return false;
235 }
236 if (is_cross_endian)
237 ByteSwap(&dest->len);
238
239 if (buffer.size < *offset + dest->len) {
240 LOG(ERROR) << "Not enough bytes to read string";
241 return false;
242 }
243 dest->str = string(buffer.ptr + *offset);
244 *offset += dest->len / sizeof(*buffer.ptr);
245 return true;
246 }
247
248 // Read read info from perf data. Corresponds to sample format type
249 // PERF_SAMPLE_READ.
ReadReadInfo(const uint64_t * array,bool swap_bytes,uint64_t read_format,struct perf_sample * sample)250 const uint64_t* ReadReadInfo(const uint64_t* array,
251 bool swap_bytes,
252 uint64_t read_format,
253 struct perf_sample* sample) {
254 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
255 sample->read.time_enabled = *array++;
256 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
257 sample->read.time_running = *array++;
258 if (read_format & PERF_FORMAT_ID)
259 sample->read.one.id = *array++;
260
261 if (swap_bytes) {
262 ByteSwap(&sample->read.time_enabled);
263 ByteSwap(&sample->read.time_running);
264 ByteSwap(&sample->read.one.id);
265 }
266
267 return array;
268 }
269
270 // Read call chain info from perf data. Corresponds to sample format type
271 // PERF_SAMPLE_CALLCHAIN.
ReadCallchain(const uint64_t * array,bool swap_bytes,struct perf_sample * sample)272 const uint64_t* ReadCallchain(const uint64_t* array,
273 bool swap_bytes,
274 struct perf_sample* sample) {
275 // Make sure there is no existing allocated memory in |sample->callchain|.
276 CHECK_EQ(static_cast<void*>(NULL), sample->callchain);
277
278 // The callgraph data consists of a uint64_t value |nr| followed by |nr|
279 // addresses.
280 uint64_t callchain_size = *array++;
281 if (swap_bytes)
282 ByteSwap(&callchain_size);
283 struct ip_callchain* callchain =
284 reinterpret_cast<struct ip_callchain*>(new uint64_t[callchain_size + 1]);
285 callchain->nr = callchain_size;
286 for (size_t i = 0; i < callchain_size; ++i) {
287 callchain->ips[i] = *array++;
288 if (swap_bytes)
289 ByteSwap(&callchain->ips[i]);
290 }
291 sample->callchain = callchain;
292
293 return array;
294 }
295
296 // Read raw info from perf data. Corresponds to sample format type
297 // PERF_SAMPLE_RAW.
ReadRawData(const uint64_t * array,bool swap_bytes,struct perf_sample * sample)298 const uint64_t* ReadRawData(const uint64_t* array,
299 bool swap_bytes,
300 struct perf_sample* sample) {
301 // First read the size.
302 const uint32_t* ptr = reinterpret_cast<const uint32_t*>(array);
303 sample->raw_size = *ptr++;
304 if (swap_bytes)
305 ByteSwap(&sample->raw_size);
306
307 // Allocate space for and read the raw data bytes.
308 sample->raw_data = new uint8_t[sample->raw_size];
309 memcpy(sample->raw_data, ptr, sample->raw_size);
310
311 // Determine the bytes that were read, and align to the next 64 bits.
312 int bytes_read = AlignSize(sizeof(sample->raw_size) + sample->raw_size,
313 sizeof(uint64_t));
314 array += bytes_read / sizeof(uint64_t);
315
316 return array;
317 }
318
319 // Read call chain info from perf data. Corresponds to sample format type
320 // PERF_SAMPLE_CALLCHAIN.
ReadBranchStack(const uint64_t * array,bool swap_bytes,struct perf_sample * sample)321 const uint64_t* ReadBranchStack(const uint64_t* array,
322 bool swap_bytes,
323 struct perf_sample* sample) {
324 // Make sure there is no existing allocated memory in
325 // |sample->branch_stack|.
326 CHECK_EQ(static_cast<void*>(NULL), sample->branch_stack);
327
328 // The branch stack data consists of a uint64_t value |nr| followed by |nr|
329 // branch_entry structs.
330 uint64_t branch_stack_size = *array++;
331 if (swap_bytes)
332 ByteSwap(&branch_stack_size);
333 struct branch_stack* branch_stack =
334 reinterpret_cast<struct branch_stack*>(
335 new uint8_t[sizeof(uint64_t) +
336 branch_stack_size * sizeof(struct branch_entry)]);
337 branch_stack->nr = branch_stack_size;
338 for (size_t i = 0; i < branch_stack_size; ++i) {
339 memcpy(&branch_stack->entries[i], array, sizeof(struct branch_entry));
340 array += sizeof(struct branch_entry) / sizeof(*array);
341 if (swap_bytes) {
342 ByteSwap(&branch_stack->entries[i].from);
343 ByteSwap(&branch_stack->entries[i].to);
344 }
345 }
346 sample->branch_stack = branch_stack;
347
348 return array;
349 }
350
ReadPerfSampleFromData(const perf_event_type event_type,const uint64_t * array,const uint64_t sample_fields,const uint64_t read_format,bool swap_bytes,struct perf_sample * sample)351 size_t ReadPerfSampleFromData(const perf_event_type event_type,
352 const uint64_t* array,
353 const uint64_t sample_fields,
354 const uint64_t read_format,
355 bool swap_bytes,
356 struct perf_sample* sample) {
357 const uint64_t* initial_array_ptr = array;
358
359 union {
360 uint32_t val32[sizeof(uint64_t) / sizeof(uint32_t)];
361 uint64_t val64;
362 };
363
364 // See structure for PERF_RECORD_SAMPLE in kernel/perf_event.h
365 // and compare sample_id when sample_id_all is set.
366
367 // NB: For sample_id, sample_fields has already been masked to the set
368 // of fields in that struct by GetSampleFieldsForEventType. That set
369 // of fields is mostly in the same order as PERF_RECORD_SAMPLE, with
370 // the exception of PERF_SAMPLE_IDENTIFIER.
371
372 // PERF_SAMPLE_IDENTIFIER is in a different location depending on
373 // if this is a SAMPLE event or the sample_id of another event.
374 if (event_type == PERF_RECORD_SAMPLE) {
375 // { u64 id; } && PERF_SAMPLE_IDENTIFIER
376 if (sample_fields & PERF_SAMPLE_IDENTIFIER) {
377 sample->id = MaybeSwap(*array++, swap_bytes);
378 }
379 }
380
381 // { u64 ip; } && PERF_SAMPLE_IP
382 if (sample_fields & PERF_SAMPLE_IP) {
383 sample->ip = MaybeSwap(*array++, swap_bytes);
384 }
385
386 // { u32 pid, tid; } && PERF_SAMPLE_TID
387 if (sample_fields & PERF_SAMPLE_TID) {
388 val64 = *array++;
389 sample->pid = MaybeSwap(val32[0], swap_bytes);
390 sample->tid = MaybeSwap(val32[1], swap_bytes);
391 }
392
393 // { u64 time; } && PERF_SAMPLE_TIME
394 if (sample_fields & PERF_SAMPLE_TIME) {
395 sample->time = MaybeSwap(*array++, swap_bytes);
396 }
397
398 // { u64 addr; } && PERF_SAMPLE_ADDR
399 if (sample_fields & PERF_SAMPLE_ADDR) {
400 sample->addr = MaybeSwap(*array++, swap_bytes);
401 }
402
403 // { u64 id; } && PERF_SAMPLE_ID
404 if (sample_fields & PERF_SAMPLE_ID) {
405 sample->id = MaybeSwap(*array++, swap_bytes);
406 }
407
408 // { u64 stream_id;} && PERF_SAMPLE_STREAM_ID
409 if (sample_fields & PERF_SAMPLE_STREAM_ID) {
410 sample->stream_id = MaybeSwap(*array++, swap_bytes);
411 }
412
413 // { u32 cpu, res; } && PERF_SAMPLE_CPU
414 if (sample_fields & PERF_SAMPLE_CPU) {
415 val64 = *array++;
416 sample->cpu = MaybeSwap(val32[0], swap_bytes);
417 // sample->res = MaybeSwap(*val32[1], swap_bytes); // not implemented?
418 }
419
420 // This is the location of PERF_SAMPLE_IDENTIFIER in struct sample_id.
421 if (event_type != PERF_RECORD_SAMPLE) {
422 // { u64 id; } && PERF_SAMPLE_IDENTIFIER
423 if (sample_fields & PERF_SAMPLE_IDENTIFIER) {
424 sample->id = MaybeSwap(*array++, swap_bytes);
425 }
426 }
427
428 //
429 // The remaining fields are only in PERF_RECORD_SAMPLE
430 //
431
432 // { u64 period; } && PERF_SAMPLE_PERIOD
433 if (sample_fields & PERF_SAMPLE_PERIOD) {
434 sample->period = MaybeSwap(*array++, swap_bytes);
435 }
436
437 // { struct read_format values; } && PERF_SAMPLE_READ
438 if (sample_fields & PERF_SAMPLE_READ) {
439 // TODO(cwp-team): support grouped read info.
440 if (read_format & PERF_FORMAT_GROUP)
441 return 0;
442 array = ReadReadInfo(array, swap_bytes, read_format, sample);
443 }
444
445 // { u64 nr,
446 // u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN
447 if (sample_fields & PERF_SAMPLE_CALLCHAIN) {
448 array = ReadCallchain(array, swap_bytes, sample);
449 }
450
451 // { u32 size;
452 // char data[size];}&& PERF_SAMPLE_RAW
453 if (sample_fields & PERF_SAMPLE_RAW) {
454 array = ReadRawData(array, swap_bytes, sample);
455 }
456
457 // { u64 nr;
458 // { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK
459 if (sample_fields & PERF_SAMPLE_BRANCH_STACK) {
460 array = ReadBranchStack(array, swap_bytes, sample);
461 }
462
463 static const u64 kUnimplementedSampleFields =
464 PERF_SAMPLE_REGS_USER |
465 PERF_SAMPLE_STACK_USER |
466 PERF_SAMPLE_WEIGHT |
467 PERF_SAMPLE_DATA_SRC |
468 PERF_SAMPLE_TRANSACTION;
469
470 if (sample_fields & kUnimplementedSampleFields) {
471 LOG(WARNING) << "Unimplemented sample fields 0x"
472 << std::hex << (sample_fields & kUnimplementedSampleFields);
473 }
474
475 if (sample_fields & ~(PERF_SAMPLE_MAX-1)) {
476 LOG(WARNING) << "Unrecognized sample fields 0x"
477 << std::hex << (sample_fields & ~(PERF_SAMPLE_MAX-1));
478 }
479
480 return (array - initial_array_ptr) * sizeof(uint64_t);
481 }
482
WritePerfSampleToData(const perf_event_type event_type,const struct perf_sample & sample,const uint64_t sample_fields,const uint64_t read_format,uint64_t * array)483 size_t WritePerfSampleToData(const perf_event_type event_type,
484 const struct perf_sample& sample,
485 const uint64_t sample_fields,
486 const uint64_t read_format,
487 uint64_t* array) {
488 const uint64_t* initial_array_ptr = array;
489
490 union {
491 uint32_t val32[sizeof(uint64_t) / sizeof(uint32_t)];
492 uint64_t val64;
493 };
494
495 // See notes at the top of ReadPerfSampleFromData regarding the structure
496 // of PERF_RECORD_SAMPLE, sample_id, and PERF_SAMPLE_IDENTIFIER, as they
497 // all apply here as well.
498
499 // PERF_SAMPLE_IDENTIFIER is in a different location depending on
500 // if this is a SAMPLE event or the sample_id of another event.
501 if (event_type == PERF_RECORD_SAMPLE) {
502 // { u64 id; } && PERF_SAMPLE_IDENTIFIER
503 if (sample_fields & PERF_SAMPLE_IDENTIFIER) {
504 *array++ = sample.id;
505 }
506 }
507
508 // { u64 ip; } && PERF_SAMPLE_IP
509 if (sample_fields & PERF_SAMPLE_IP) {
510 *array++ = sample.ip;
511 }
512
513 // { u32 pid, tid; } && PERF_SAMPLE_TID
514 if (sample_fields & PERF_SAMPLE_TID) {
515 val32[0] = sample.pid;
516 val32[1] = sample.tid;
517 *array++ = val64;
518 }
519
520 // { u64 time; } && PERF_SAMPLE_TIME
521 if (sample_fields & PERF_SAMPLE_TIME) {
522 *array++ = sample.time;
523 }
524
525 // { u64 addr; } && PERF_SAMPLE_ADDR
526 if (sample_fields & PERF_SAMPLE_ADDR) {
527 *array++ = sample.addr;
528 }
529
530 // { u64 id; } && PERF_SAMPLE_ID
531 if (sample_fields & PERF_SAMPLE_ID) {
532 *array++ = sample.id;
533 }
534
535 // { u64 stream_id;} && PERF_SAMPLE_STREAM_ID
536 if (sample_fields & PERF_SAMPLE_STREAM_ID) {
537 *array++ = sample.stream_id;
538 }
539
540 // { u32 cpu, res; } && PERF_SAMPLE_CPU
541 if (sample_fields & PERF_SAMPLE_CPU) {
542 val32[0] = sample.cpu;
543 // val32[1] = sample.res; // not implemented?
544 val32[1] = 0;
545 *array++ = val64;
546 }
547
548 // This is the location of PERF_SAMPLE_IDENTIFIER in struct sample_id.
549 if (event_type != PERF_RECORD_SAMPLE) {
550 // { u64 id; } && PERF_SAMPLE_IDENTIFIER
551 if (sample_fields & PERF_SAMPLE_IDENTIFIER) {
552 *array++ = sample.id;
553 }
554 }
555
556 //
557 // The remaining fields are only in PERF_RECORD_SAMPLE
558 //
559
560 // { u64 period; } && PERF_SAMPLE_PERIOD
561 if (sample_fields & PERF_SAMPLE_PERIOD) {
562 *array++ = sample.period;
563 }
564
565 // { struct read_format values; } && PERF_SAMPLE_READ
566 if (sample_fields & PERF_SAMPLE_READ) {
567 // TODO(cwp-team): support grouped read info.
568 if (read_format & PERF_FORMAT_GROUP)
569 return 0;
570 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
571 *array++ = sample.read.time_enabled;
572 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
573 *array++ = sample.read.time_running;
574 if (read_format & PERF_FORMAT_ID)
575 *array++ = sample.read.one.id;
576 }
577
578 // { u64 nr,
579 // u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN
580 if (sample_fields & PERF_SAMPLE_CALLCHAIN) {
581 if (!sample.callchain) {
582 LOG(ERROR) << "Expecting callchain data, but none was found.";
583 } else {
584 *array++ = sample.callchain->nr;
585 for (size_t i = 0; i < sample.callchain->nr; ++i)
586 *array++ = sample.callchain->ips[i];
587 }
588 }
589
590 // { u32 size;
591 // char data[size];}&& PERF_SAMPLE_RAW
592 if (sample_fields & PERF_SAMPLE_RAW) {
593 uint32_t* ptr = reinterpret_cast<uint32_t*>(array);
594 *ptr++ = sample.raw_size;
595 memcpy(ptr, sample.raw_data, sample.raw_size);
596
597 // Update the data read pointer after aligning to the next 64 bytes.
598 int num_bytes = AlignSize(sizeof(sample.raw_size) + sample.raw_size,
599 sizeof(uint64_t));
600 array += num_bytes / sizeof(uint64_t);
601 }
602
603 // { u64 nr;
604 // { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK
605 if (sample_fields & PERF_SAMPLE_BRANCH_STACK) {
606 if (!sample.branch_stack) {
607 LOG(ERROR) << "Expecting branch stack data, but none was found.";
608 } else {
609 *array++ = sample.branch_stack->nr;
610 for (size_t i = 0; i < sample.branch_stack->nr; ++i) {
611 *array++ = sample.branch_stack->entries[i].from;
612 *array++ = sample.branch_stack->entries[i].to;
613 memcpy(array++, &sample.branch_stack->entries[i].flags,
614 sizeof(uint64_t));
615 }
616 }
617 }
618
619 return (array - initial_array_ptr) * sizeof(uint64_t);
620 }
621
622 } // namespace
623
~PerfReader()624 PerfReader::~PerfReader() {
625 // Free allocated memory.
626 for (size_t i = 0; i < build_id_events_.size(); ++i)
627 if (build_id_events_[i])
628 free(build_id_events_[i]);
629 }
630
PerfizeBuildIDString(string * build_id)631 void PerfReader::PerfizeBuildIDString(string* build_id) {
632 build_id->resize(kBuildIDStringLength, '0');
633 }
634
UnperfizeBuildIDString(string * build_id)635 void PerfReader::UnperfizeBuildIDString(string* build_id) {
636 const size_t kPaddingSize = 8;
637 const string kBuildIDPadding = string(kPaddingSize, '0');
638
639 // Remove kBuildIDPadding from the end of build_id until we cannot remove any
640 // more, or removing more would cause the build id to be empty.
641 while (build_id->size() > kPaddingSize &&
642 build_id->substr(build_id->size() - kPaddingSize) == kBuildIDPadding) {
643 build_id->resize(build_id->size() - kPaddingSize);
644 }
645 }
646
ReadFile(const string & filename)647 bool PerfReader::ReadFile(const string& filename) {
648 std::vector<char> data;
649 if (!ReadFileToData(filename, &data))
650 return false;
651 return ReadFromVector(data);
652 }
653
ReadFromVector(const std::vector<char> & data)654 bool PerfReader::ReadFromVector(const std::vector<char>& data) {
655 return ReadFromPointer(&data[0], data.size());
656 }
657
ReadFromString(const string & str)658 bool PerfReader::ReadFromString(const string& str) {
659 return ReadFromPointer(str.c_str(), str.size());
660 }
661
ReadFromPointer(const char * perf_data,size_t size)662 bool PerfReader::ReadFromPointer(const char* perf_data, size_t size) {
663 const ConstBufferWithSize data = { perf_data, size };
664
665 if (data.size == 0)
666 return false;
667 if (!ReadHeader(data))
668 return false;
669
670 // Check if it is normal perf data.
671 if (header_.size == sizeof(header_)) {
672 DLOG(INFO) << "Perf data is in normal format.";
673 metadata_mask_ = header_.adds_features[0];
674 return (ReadAttrs(data) && ReadEventTypes(data) && ReadData(data)
675 && ReadMetadata(data));
676 }
677
678 // Otherwise it is piped data.
679 LOG(ERROR) << "Internal error: no support for piped data";
680 return false;
681 }
682
Localize(const std::map<string,string> & build_ids_to_filenames)683 bool PerfReader::Localize(
684 const std::map<string, string>& build_ids_to_filenames) {
685 std::map<string, string> perfized_build_ids_to_filenames;
686 std::map<string, string>::const_iterator it;
687 for (it = build_ids_to_filenames.begin();
688 it != build_ids_to_filenames.end();
689 ++it) {
690 string build_id = it->first;
691 PerfizeBuildIDString(&build_id);
692 perfized_build_ids_to_filenames[build_id] = it->second;
693 }
694
695 std::map<string, string> filename_map;
696 for (size_t i = 0; i < build_id_events_.size(); ++i) {
697 build_id_event* event = build_id_events_[i];
698 string build_id = HexToString(event->build_id, kBuildIDArraySize);
699 if (perfized_build_ids_to_filenames.find(build_id) ==
700 perfized_build_ids_to_filenames.end()) {
701 continue;
702 }
703
704 string new_name = perfized_build_ids_to_filenames.at(build_id);
705 filename_map[string(event->filename)] = new_name;
706 build_id_event* new_event = CreateOrUpdateBuildID("", new_name, 0, event);
707 CHECK(new_event);
708 build_id_events_[i] = new_event;
709 }
710
711 LocalizeUsingFilenames(filename_map);
712 return true;
713 }
714
LocalizeUsingFilenames(const std::map<string,string> & filename_map)715 bool PerfReader::LocalizeUsingFilenames(
716 const std::map<string, string>& filename_map) {
717 LocalizeMMapFilenames(filename_map);
718 for (size_t i = 0; i < build_id_events_.size(); ++i) {
719 build_id_event* event = build_id_events_[i];
720 string old_name = event->filename;
721
722 if (filename_map.find(event->filename) != filename_map.end()) {
723 const string& new_name = filename_map.at(old_name);
724 build_id_event* new_event = CreateOrUpdateBuildID("", new_name, 0, event);
725 CHECK(new_event);
726 build_id_events_[i] = new_event;
727 }
728 }
729 return true;
730 }
731
GetFilenames(std::vector<string> * filenames) const732 void PerfReader::GetFilenames(std::vector<string>* filenames) const {
733 std::set<string> filename_set;
734 GetFilenamesAsSet(&filename_set);
735 filenames->clear();
736 filenames->insert(filenames->begin(), filename_set.begin(),
737 filename_set.end());
738 }
739
GetFilenamesAsSet(std::set<string> * filenames) const740 void PerfReader::GetFilenamesAsSet(std::set<string>* filenames) const {
741 filenames->clear();
742 for (size_t i = 0; i < events_.size(); ++i) {
743 const event_t& event = *events_[i];
744 if (event.header.type == PERF_RECORD_MMAP)
745 filenames->insert(event.mmap.filename);
746 if (event.header.type == PERF_RECORD_MMAP2)
747 filenames->insert(event.mmap2.filename);
748 }
749 }
750
GetFilenamesToBuildIDs(std::map<string,string> * filenames_to_build_ids) const751 void PerfReader::GetFilenamesToBuildIDs(
752 std::map<string, string>* filenames_to_build_ids) const {
753 filenames_to_build_ids->clear();
754 for (size_t i = 0; i < build_id_events_.size(); ++i) {
755 const build_id_event& event = *build_id_events_[i];
756 string build_id = HexToString(event.build_id, kBuildIDArraySize);
757 (*filenames_to_build_ids)[event.filename] = build_id;
758 }
759 }
760
IsSupportedEventType(uint32_t type)761 bool PerfReader::IsSupportedEventType(uint32_t type) {
762 switch (type) {
763 case PERF_RECORD_SAMPLE:
764 case PERF_RECORD_MMAP:
765 case PERF_RECORD_MMAP2:
766 case PERF_RECORD_FORK:
767 case PERF_RECORD_EXIT:
768 case PERF_RECORD_COMM:
769 case PERF_RECORD_LOST:
770 case PERF_RECORD_THROTTLE:
771 case PERF_RECORD_UNTHROTTLE:
772 return true;
773 case PERF_RECORD_READ:
774 case PERF_RECORD_MAX:
775 return false;
776 default:
777 LOG(FATAL) << "Unknown event type " << type;
778 return false;
779 }
780 }
781
ReadPerfSampleInfo(const event_t & event,struct perf_sample * sample) const782 bool PerfReader::ReadPerfSampleInfo(const event_t& event,
783 struct perf_sample* sample) const {
784 CHECK(sample);
785
786 if (!IsSupportedEventType(event.header.type)) {
787 LOG(ERROR) << "Unsupported event type " << event.header.type;
788 return false;
789 }
790
791 uint64_t sample_format = GetSampleFieldsForEventType(event.header.type,
792 sample_type_);
793 uint64_t offset = GetPerfSampleDataOffset(event);
794 size_t size_read = ReadPerfSampleFromData(
795 static_cast<perf_event_type>(event.header.type),
796 reinterpret_cast<const uint64_t*>(&event) + offset / sizeof(uint64_t),
797 sample_format,
798 read_format_,
799 is_cross_endian_,
800 sample);
801
802 size_t expected_size = event.header.size - offset;
803 if (size_read != expected_size) {
804 LOG(ERROR) << "Read " << size_read << " bytes, expected "
805 << expected_size << " bytes.";
806 }
807
808 return (size_read == expected_size);
809 }
810
WritePerfSampleInfo(const perf_sample & sample,event_t * event) const811 bool PerfReader::WritePerfSampleInfo(const perf_sample& sample,
812 event_t* event) const {
813 CHECK(event);
814
815 if (!IsSupportedEventType(event->header.type)) {
816 LOG(ERROR) << "Unsupported event type " << event->header.type;
817 return false;
818 }
819
820 uint64_t sample_format = GetSampleFieldsForEventType(event->header.type,
821 sample_type_);
822 uint64_t offset = GetPerfSampleDataOffset(*event);
823
824 size_t expected_size = event->header.size - offset;
825 memset(reinterpret_cast<uint8_t*>(event) + offset, 0, expected_size);
826 size_t size_written = WritePerfSampleToData(
827 static_cast<perf_event_type>(event->header.type),
828 sample,
829 sample_format,
830 read_format_,
831 reinterpret_cast<uint64_t*>(event) + offset / sizeof(uint64_t));
832 if (size_written != expected_size) {
833 LOG(ERROR) << "Wrote " << size_written << " bytes, expected "
834 << expected_size << " bytes.";
835 }
836
837 return (size_written == expected_size);
838 }
839
ReadHeader(const ConstBufferWithSize & data)840 bool PerfReader::ReadHeader(const ConstBufferWithSize& data) {
841 CheckNoEventHeaderPadding();
842 size_t offset = 0;
843 if (!ReadDataFromBuffer(data, sizeof(piped_header_), "header magic",
844 &offset, &piped_header_)) {
845 return false;
846 }
847 if (piped_header_.magic != kPerfMagic &&
848 piped_header_.magic != bswap_64(kPerfMagic)) {
849 LOG(ERROR) << "Read wrong magic. Expected: 0x" << std::hex << kPerfMagic
850 << " or 0x" << std::hex << bswap_64(kPerfMagic)
851 << " Got: 0x" << std::hex << piped_header_.magic;
852 return false;
853 }
854 is_cross_endian_ = (piped_header_.magic != kPerfMagic);
855 if (is_cross_endian_)
856 ByteSwap(&piped_header_.size);
857
858 // Header can be a piped header.
859 if (piped_header_.size == sizeof(piped_header_))
860 return true;
861
862 // Re-read full header
863 offset = 0;
864 if (!ReadDataFromBuffer(data, sizeof(header_), "header data",
865 &offset, &header_)) {
866 return false;
867 }
868 if (is_cross_endian_)
869 ByteSwap(&header_.size);
870
871 DLOG(INFO) << "event_types.size: " << header_.event_types.size;
872 DLOG(INFO) << "event_types.offset: " << header_.event_types.offset;
873
874 return true;
875 }
876
ReadAttrs(const ConstBufferWithSize & data)877 bool PerfReader::ReadAttrs(const ConstBufferWithSize& data) {
878 size_t num_attrs = header_.attrs.size / header_.attr_size;
879 size_t offset = header_.attrs.offset;
880 for (size_t i = 0; i < num_attrs; i++) {
881 if (!ReadAttr(data, &offset))
882 return false;
883 }
884 return true;
885 }
886
ReadAttr(const ConstBufferWithSize & data,size_t * offset)887 bool PerfReader::ReadAttr(const ConstBufferWithSize& data, size_t* offset) {
888 PerfFileAttr attr;
889 if (!ReadEventAttr(data, offset, &attr.attr))
890 return false;
891
892 perf_file_section ids;
893 if (!ReadDataFromBuffer(data, sizeof(ids), "ID section info", offset, &ids))
894 return false;
895 if (is_cross_endian_) {
896 ByteSwap(&ids.offset);
897 ByteSwap(&ids.size);
898 }
899
900 size_t num_ids = ids.size / sizeof(decltype(attr.ids)::value_type);
901 // Convert the offset from u64 to size_t.
902 size_t ids_offset = ids.offset;
903 if (!ReadUniqueIDs(data, num_ids, &ids_offset, &attr.ids))
904 return false;
905 attrs_.push_back(attr);
906 return true;
907 }
908
ReadPerfEventAttrSize(const ConstBufferWithSize & data,size_t attr_offset)909 u32 PerfReader::ReadPerfEventAttrSize(const ConstBufferWithSize& data,
910 size_t attr_offset) {
911 static_assert(std::is_same<decltype(perf_event_attr::size), u32>::value,
912 "ReadPerfEventAttrSize return type should match "
913 "perf_event_attr.size");
914 u32 attr_size;
915 size_t attr_size_offset = attr_offset + offsetof(perf_event_attr, size);
916 if (!ReadDataFromBuffer(data, sizeof(perf_event_attr::size),
917 "attr.size", &attr_size_offset, &attr_size)) {
918 return kuint32max;
919 }
920 return MaybeSwap(attr_size, is_cross_endian_);
921 }
922
ReadEventAttr(const ConstBufferWithSize & data,size_t * offset,perf_event_attr * attr)923 bool PerfReader::ReadEventAttr(const ConstBufferWithSize& data, size_t* offset,
924 perf_event_attr* attr) {
925 CheckNoPerfEventAttrPadding();
926
927 std::memset(attr, 0, sizeof(*attr));
928 //*attr = {0};
929
930 // read just size first
931 u32 attr_size = ReadPerfEventAttrSize(data, *offset);
932 if (attr_size == kuint32max) {
933 return false;
934 }
935
936 // now read the the struct.
937 if (!ReadDataFromBuffer(data, attr_size, "attribute", offset,
938 reinterpret_cast<char*>(attr))) {
939 return false;
940 }
941
942 if (is_cross_endian_) {
943 // Depending on attr->size, some of these might not have actually been
944 // read. This is okay: they are zero.
945 ByteSwap(&attr->type);
946 ByteSwap(&attr->size);
947 ByteSwap(&attr->config);
948 ByteSwap(&attr->sample_period);
949 ByteSwap(&attr->sample_type);
950 ByteSwap(&attr->read_format);
951
952 // NB: This will also reverse precise_ip : 2 as if it was two fields:
953 auto *const bitfield_start = &attr->read_format + 1;
954 SwapBitfieldOfBits(reinterpret_cast<u8*>(bitfield_start),
955 sizeof(u64));
956 // ... So swap it back:
957 const auto tmp = attr->precise_ip;
958 attr->precise_ip = (tmp & 0x2) >> 1 | (tmp & 0x1) << 1;
959
960 ByteSwap(&attr->wakeup_events); // union with wakeup_watermark
961 ByteSwap(&attr->bp_type);
962 ByteSwap(&attr->bp_addr); // union with config1
963 ByteSwap(&attr->bp_len); // union with config2
964 ByteSwap(&attr->branch_sample_type);
965 ByteSwap(&attr->sample_regs_user);
966 ByteSwap(&attr->sample_stack_user);
967 }
968
969 CHECK_EQ(attr_size, attr->size);
970 // The actual perf_event_attr data size might be different from the size of
971 // the struct definition. Check against perf_event_attr's |size| field.
972 attr->size = sizeof(*attr);
973
974 // Assign sample type if it hasn't been assigned, otherwise make sure all
975 // subsequent attributes have the same sample type bits set.
976 if (sample_type_ == 0) {
977 sample_type_ = attr->sample_type;
978 } else {
979 CHECK_EQ(sample_type_, attr->sample_type)
980 << "Event type sample format does not match sample format of other "
981 << "event type.";
982 }
983
984 if (read_format_ == 0) {
985 read_format_ = attr->read_format;
986 } else {
987 CHECK_EQ(read_format_, attr->read_format)
988 << "Event type read format does not match read format of other event "
989 << "types.";
990 }
991
992 return true;
993 }
994
ReadUniqueIDs(const ConstBufferWithSize & data,size_t num_ids,size_t * offset,std::vector<u64> * ids)995 bool PerfReader::ReadUniqueIDs(const ConstBufferWithSize& data, size_t num_ids,
996 size_t* offset, std::vector<u64>* ids) {
997 ids->resize(num_ids);
998 for (size_t j = 0; j < num_ids; j++) {
999 if (!ReadDataFromBuffer(data, sizeof(ids->at(j)), "ID", offset,
1000 &ids->at(j))) {
1001 return false;
1002 }
1003 if (is_cross_endian_)
1004 ByteSwap(&ids->at(j));
1005 }
1006 return true;
1007 }
1008
ReadEventTypes(const ConstBufferWithSize & data)1009 bool PerfReader::ReadEventTypes(const ConstBufferWithSize& data) {
1010 size_t num_event_types = header_.event_types.size /
1011 sizeof(struct perf_trace_event_type);
1012 CHECK_EQ(sizeof(perf_trace_event_type) * num_event_types,
1013 header_.event_types.size);
1014 size_t offset = header_.event_types.offset;
1015 for (size_t i = 0; i < num_event_types; ++i) {
1016 if (!ReadEventType(data, &offset))
1017 return false;
1018 }
1019 return true;
1020 }
1021
ReadEventType(const ConstBufferWithSize & data,size_t * offset)1022 bool PerfReader::ReadEventType(const ConstBufferWithSize& data,
1023 size_t* offset) {
1024 CheckNoEventTypePadding();
1025 perf_trace_event_type type;
1026 memset(&type, 0, sizeof(type));
1027 if (!ReadDataFromBuffer(data, sizeof(type.event_id), "event id",
1028 offset, &type.event_id)) {
1029 return false;
1030 }
1031 const char* event_name = reinterpret_cast<const char*>(data.ptr + *offset);
1032 CHECK_GT(snprintf(type.name, sizeof(type.name), "%s", event_name), 0);
1033 *offset += sizeof(type.name);
1034 event_types_.push_back(type);
1035 return true;
1036 }
1037
ReadData(const ConstBufferWithSize & data)1038 bool PerfReader::ReadData(const ConstBufferWithSize& data) {
1039 u64 data_remaining_bytes = header_.data.size;
1040 size_t offset = header_.data.offset;
1041 while (data_remaining_bytes != 0) {
1042 if (data.size < offset) {
1043 LOG(ERROR) << "Not enough data to read a perf event.";
1044 return false;
1045 }
1046
1047 const event_t* event = reinterpret_cast<const event_t*>(data.ptr + offset);
1048 if (!ReadPerfEventBlock(*event))
1049 return false;
1050 data_remaining_bytes -= event->header.size;
1051 offset += event->header.size;
1052 }
1053
1054 DLOG(INFO) << "Number of events stored: "<< events_.size();
1055 return true;
1056 }
1057
ReadMetadata(const ConstBufferWithSize & data)1058 bool PerfReader::ReadMetadata(const ConstBufferWithSize& data) {
1059 size_t offset = header_.data.offset + header_.data.size;
1060
1061 for (u32 type = HEADER_FIRST_FEATURE; type != HEADER_LAST_FEATURE; ++type) {
1062 if ((metadata_mask_ & (1 << type)) == 0)
1063 continue;
1064
1065 if (data.size < offset) {
1066 LOG(ERROR) << "Not enough data to read offset and size of metadata.";
1067 return false;
1068 }
1069
1070 u64 metadata_offset, metadata_size;
1071 if (!ReadDataFromBuffer(data, sizeof(metadata_offset), "metadata offset",
1072 &offset, &metadata_offset) ||
1073 !ReadDataFromBuffer(data, sizeof(metadata_size), "metadata size",
1074 &offset, &metadata_size)) {
1075 return false;
1076 }
1077
1078 if (data.size < metadata_offset + metadata_size) {
1079 LOG(ERROR) << "Not enough data to read metadata.";
1080 return false;
1081 }
1082
1083 switch (type) {
1084 case HEADER_TRACING_DATA:
1085 if (!ReadTracingMetadata(data, metadata_offset, metadata_size)) {
1086 return false;
1087 }
1088 break;
1089 case HEADER_BUILD_ID:
1090 if (!ReadBuildIDMetadata(data, type, metadata_offset, metadata_size))
1091 return false;
1092 break;
1093 case HEADER_HOSTNAME:
1094 case HEADER_OSRELEASE:
1095 case HEADER_VERSION:
1096 case HEADER_ARCH:
1097 case HEADER_CPUDESC:
1098 case HEADER_CPUID:
1099 case HEADER_CMDLINE:
1100 if (!ReadStringMetadata(data, type, metadata_offset, metadata_size))
1101 return false;
1102 break;
1103 case HEADER_NRCPUS:
1104 if (!ReadUint32Metadata(data, type, metadata_offset, metadata_size))
1105 return false;
1106 break;
1107 case HEADER_TOTAL_MEM:
1108 if (!ReadUint64Metadata(data, type, metadata_offset, metadata_size))
1109 return false;
1110 break;
1111 case HEADER_EVENT_DESC:
1112 break;
1113 case HEADER_CPU_TOPOLOGY:
1114 if (!ReadCPUTopologyMetadata(data, type, metadata_offset, metadata_size))
1115 return false;
1116 break;
1117 case HEADER_NUMA_TOPOLOGY:
1118 if (!ReadNUMATopologyMetadata(data, type, metadata_offset, metadata_size))
1119 return false;
1120 break;
1121 case HEADER_PMU_MAPPINGS:
1122 // ignore for now
1123 continue;
1124 break;
1125 case HEADER_BRANCH_STACK:
1126 continue;
1127 default: LOG(INFO) << "Unsupported metadata type: " << type;
1128 break;
1129 }
1130 }
1131
1132 // Event type events are optional in some newer versions of perf. They
1133 // contain the same information that is already in |attrs_|. Make sure the
1134 // number of event types matches the number of attrs, but only if there are
1135 // event type events present.
1136 if (event_types_.size() > 0) {
1137 if (event_types_.size() != attrs_.size()) {
1138 LOG(ERROR) << "Mismatch between number of event type events and attr "
1139 << "events: " << event_types_.size() << " vs "
1140 << attrs_.size();
1141 return false;
1142 }
1143 metadata_mask_ |= (1 << HEADER_EVENT_DESC);
1144 }
1145 return true;
1146 }
1147
ReadBuildIDMetadata(const ConstBufferWithSize & data,u32,size_t offset,size_t size)1148 bool PerfReader::ReadBuildIDMetadata(const ConstBufferWithSize& data, u32 /*type*/,
1149 size_t offset, size_t size) {
1150 CheckNoBuildIDEventPadding();
1151 while (size > 0) {
1152 // Make sure there is enough data for everything but the filename.
1153 if (data.size < offset + sizeof(build_id_event) / sizeof(*data.ptr)) {
1154 LOG(ERROR) << "Not enough bytes to read build id event";
1155 return false;
1156 }
1157
1158 const build_id_event* temp_ptr =
1159 reinterpret_cast<const build_id_event*>(data.ptr + offset);
1160 u16 event_size = temp_ptr->header.size;
1161 if (is_cross_endian_)
1162 ByteSwap(&event_size);
1163
1164 // Make sure there is enough data for the rest of the event.
1165 if (data.size < offset + event_size / sizeof(*data.ptr)) {
1166 LOG(ERROR) << "Not enough bytes to read build id event";
1167 return false;
1168 }
1169
1170 // Allocate memory for the event and copy over the bytes.
1171 build_id_event* event = CallocMemoryForBuildID(event_size);
1172 if (!ReadDataFromBuffer(data, event_size, "build id event",
1173 &offset, event)) {
1174 return false;
1175 }
1176 if (is_cross_endian_) {
1177 ByteSwap(&event->header.type);
1178 ByteSwap(&event->header.misc);
1179 ByteSwap(&event->header.size);
1180 ByteSwap(&event->pid);
1181 }
1182 size -= event_size;
1183
1184 // Perf tends to use more space than necessary, so fix the size.
1185 event->header.size =
1186 sizeof(*event) + GetUint64AlignedStringLength(event->filename);
1187 build_id_events_.push_back(event);
1188 }
1189
1190 return true;
1191 }
1192
ReadStringMetadata(const ConstBufferWithSize & data,u32 type,size_t offset,size_t size)1193 bool PerfReader::ReadStringMetadata(const ConstBufferWithSize& data, u32 type,
1194 size_t offset, size_t size) {
1195 PerfStringMetadata str_data;
1196 str_data.type = type;
1197
1198 size_t start_offset = offset;
1199 // Skip the number of string data if it is present.
1200 if (NeedsNumberOfStringData(type))
1201 offset += sizeof(num_string_data_type) / sizeof(*data.ptr);
1202
1203 while ((offset - start_offset) < size) {
1204 CStringWithLength single_string;
1205 if (!ReadStringFromBuffer(data, is_cross_endian_, &offset, &single_string))
1206 return false;
1207 str_data.data.push_back(single_string);
1208 }
1209
1210 string_metadata_.push_back(str_data);
1211 return true;
1212 }
1213
ReadUint32Metadata(const ConstBufferWithSize & data,u32 type,size_t offset,size_t size)1214 bool PerfReader::ReadUint32Metadata(const ConstBufferWithSize& data, u32 type,
1215 size_t offset, size_t size) {
1216 PerfUint32Metadata uint32_data;
1217 uint32_data.type = type;
1218
1219 size_t start_offset = offset;
1220 while (size > offset - start_offset) {
1221 uint32_t item;
1222 if (!ReadDataFromBuffer(data, sizeof(item), "uint32_t data", &offset,
1223 &item))
1224 return false;
1225
1226 if (is_cross_endian_)
1227 ByteSwap(&item);
1228
1229 uint32_data.data.push_back(item);
1230 }
1231
1232 uint32_metadata_.push_back(uint32_data);
1233 return true;
1234 }
1235
ReadUint64Metadata(const ConstBufferWithSize & data,u32 type,size_t offset,size_t size)1236 bool PerfReader::ReadUint64Metadata(const ConstBufferWithSize& data, u32 type,
1237 size_t offset, size_t size) {
1238 PerfUint64Metadata uint64_data;
1239 uint64_data.type = type;
1240
1241 size_t start_offset = offset;
1242 while (size > offset - start_offset) {
1243 uint64_t item;
1244 if (!ReadDataFromBuffer(data, sizeof(item), "uint64_t data", &offset,
1245 &item))
1246 return false;
1247
1248 if (is_cross_endian_)
1249 ByteSwap(&item);
1250
1251 uint64_data.data.push_back(item);
1252 }
1253
1254 uint64_metadata_.push_back(uint64_data);
1255 return true;
1256 }
1257
ReadCPUTopologyMetadata(const ConstBufferWithSize & data,u32,size_t offset,size_t)1258 bool PerfReader::ReadCPUTopologyMetadata(
1259 const ConstBufferWithSize& data, u32 /*type*/, size_t offset, size_t /*size*/) {
1260 num_siblings_type num_core_siblings;
1261 if (!ReadDataFromBuffer(data, sizeof(num_core_siblings), "num cores",
1262 &offset, &num_core_siblings)) {
1263 return false;
1264 }
1265 if (is_cross_endian_)
1266 ByteSwap(&num_core_siblings);
1267
1268 cpu_topology_.core_siblings.resize(num_core_siblings);
1269 for (size_t i = 0; i < num_core_siblings; ++i) {
1270 if (!ReadStringFromBuffer(data, is_cross_endian_, &offset,
1271 &cpu_topology_.core_siblings[i])) {
1272 return false;
1273 }
1274 }
1275
1276 num_siblings_type num_thread_siblings;
1277 if (!ReadDataFromBuffer(data, sizeof(num_thread_siblings), "num threads",
1278 &offset, &num_thread_siblings)) {
1279 return false;
1280 }
1281 if (is_cross_endian_)
1282 ByteSwap(&num_thread_siblings);
1283
1284 cpu_topology_.thread_siblings.resize(num_thread_siblings);
1285 for (size_t i = 0; i < num_thread_siblings; ++i) {
1286 if (!ReadStringFromBuffer(data, is_cross_endian_, &offset,
1287 &cpu_topology_.thread_siblings[i])) {
1288 return false;
1289 }
1290 }
1291
1292 return true;
1293 }
1294
ReadNUMATopologyMetadata(const ConstBufferWithSize & data,u32,size_t offset,size_t)1295 bool PerfReader::ReadNUMATopologyMetadata(
1296 const ConstBufferWithSize& data, u32 /*type*/, size_t offset, size_t /*size*/) {
1297 numa_topology_num_nodes_type num_nodes;
1298 if (!ReadDataFromBuffer(data, sizeof(num_nodes), "num nodes",
1299 &offset, &num_nodes)) {
1300 return false;
1301 }
1302 if (is_cross_endian_)
1303 ByteSwap(&num_nodes);
1304
1305 for (size_t i = 0; i < num_nodes; ++i) {
1306 PerfNodeTopologyMetadata node;
1307 if (!ReadDataFromBuffer(data, sizeof(node.id), "node id",
1308 &offset, &node.id) ||
1309 !ReadDataFromBuffer(data, sizeof(node.total_memory),
1310 "node total memory", &offset,
1311 &node.total_memory) ||
1312 !ReadDataFromBuffer(data, sizeof(node.free_memory),
1313 "node free memory", &offset, &node.free_memory) ||
1314 !ReadStringFromBuffer(data, is_cross_endian_, &offset,
1315 &node.cpu_list)) {
1316 return false;
1317 }
1318 if (is_cross_endian_) {
1319 ByteSwap(&node.id);
1320 ByteSwap(&node.total_memory);
1321 ByteSwap(&node.free_memory);
1322 }
1323 numa_topology_.push_back(node);
1324 }
1325 return true;
1326 }
1327
ReadTracingMetadata(const ConstBufferWithSize & data,size_t offset,size_t size)1328 bool PerfReader::ReadTracingMetadata(
1329 const ConstBufferWithSize& data, size_t offset, size_t size) {
1330 size_t tracing_data_offset = offset;
1331 tracing_data_.resize(size);
1332 return ReadDataFromBuffer(data, tracing_data_.size(), "tracing_data",
1333 &tracing_data_offset, tracing_data_.data());
1334 }
1335
ReadTracingMetadataEvent(const ConstBufferWithSize & data,size_t offset)1336 bool PerfReader::ReadTracingMetadataEvent(
1337 const ConstBufferWithSize& data, size_t offset) {
1338 // TRACING_DATA's header.size is a lie. It is the size of only the event
1339 // struct. The size of the data is in the event struct, and followed
1340 // immediately by the tracing header data.
1341
1342 // Make a copy of the event (but not the tracing data)
1343 tracing_data_event tracing_event =
1344 *reinterpret_cast<const tracing_data_event*>(data.ptr + offset);
1345
1346 if (is_cross_endian_) {
1347 ByteSwap(&tracing_event.header.type);
1348 ByteSwap(&tracing_event.header.misc);
1349 ByteSwap(&tracing_event.header.size);
1350 ByteSwap(&tracing_event.size);
1351 }
1352
1353 return ReadTracingMetadata(data, offset + tracing_event.header.size,
1354 tracing_event.size);
1355 }
1356
ReadAttrEventBlock(const ConstBufferWithSize & data,size_t offset,size_t size)1357 bool PerfReader::ReadAttrEventBlock(const ConstBufferWithSize& data,
1358 size_t offset, size_t size) {
1359 const size_t initial_offset = offset;
1360 PerfFileAttr attr;
1361 if (!ReadEventAttr(data, &offset, &attr.attr))
1362 return false;
1363
1364 // attr.attr.size has been upgraded to the current size of perf_event_attr.
1365 const size_t actual_attr_size = offset - initial_offset;
1366
1367 const size_t num_ids =
1368 (size - actual_attr_size) / sizeof(decltype(attr.ids)::value_type);
1369 if (!ReadUniqueIDs(data, num_ids, &offset, &attr.ids))
1370 return false;
1371
1372 // Event types are found many times in the perf data file.
1373 // Only add this event type if it is not already present.
1374 for (size_t i = 0; i < attrs_.size(); ++i) {
1375 if (attrs_[i].ids[0] == attr.ids[0])
1376 return true;
1377 }
1378 attrs_.push_back(attr);
1379 return true;
1380 }
1381
1382 // When this method is called, |event| is a reference to the bytes in the data
1383 // vector that contains the entire perf.data file. As a result, we need to be
1384 // careful to only copy event.header.size bytes.
1385 // In particular, something like
1386 // event_t event_copy = event;
1387 // would be bad, because it would read past the end of the event, and possibly
1388 // pass the end of the data vector as well.
ReadPerfEventBlock(const event_t & event)1389 bool PerfReader::ReadPerfEventBlock(const event_t& event) {
1390 u16 size = event.header.size;
1391 if (is_cross_endian_)
1392 ByteSwap(&size);
1393
1394 if (size > sizeof(event_t)) {
1395 LOG(INFO) << "Data size: " << size << " sizeof(event_t): "
1396 << sizeof(event_t);
1397 return false;
1398 }
1399
1400 // Copy only the part of the event that is needed.
1401 malloced_unique_ptr<event_t> event_copy(CallocMemoryForEvent(size));
1402 memcpy(event_copy.get(), &event, size);
1403 if (is_cross_endian_) {
1404 ByteSwap(&event_copy->header.type);
1405 ByteSwap(&event_copy->header.misc);
1406 ByteSwap(&event_copy->header.size);
1407 }
1408
1409 uint32_t type = event_copy->header.type;
1410 if (is_cross_endian_) {
1411 switch (type) {
1412 case PERF_RECORD_SAMPLE:
1413 break;
1414 case PERF_RECORD_MMAP:
1415 ByteSwap(&event_copy->mmap.pid);
1416 ByteSwap(&event_copy->mmap.tid);
1417 ByteSwap(&event_copy->mmap.start);
1418 ByteSwap(&event_copy->mmap.len);
1419 ByteSwap(&event_copy->mmap.pgoff);
1420 break;
1421 case PERF_RECORD_MMAP2:
1422 ByteSwap(&event_copy->mmap2.pid);
1423 ByteSwap(&event_copy->mmap2.tid);
1424 ByteSwap(&event_copy->mmap2.start);
1425 ByteSwap(&event_copy->mmap2.len);
1426 ByteSwap(&event_copy->mmap2.pgoff);
1427 ByteSwap(&event_copy->mmap2.maj);
1428 ByteSwap(&event_copy->mmap2.min);
1429 ByteSwap(&event_copy->mmap2.ino);
1430 ByteSwap(&event_copy->mmap2.ino_generation);
1431 break;
1432 case PERF_RECORD_FORK:
1433 case PERF_RECORD_EXIT:
1434 ByteSwap(&event_copy->fork.pid);
1435 ByteSwap(&event_copy->fork.tid);
1436 ByteSwap(&event_copy->fork.ppid);
1437 ByteSwap(&event_copy->fork.ptid);
1438 break;
1439 case PERF_RECORD_COMM:
1440 ByteSwap(&event_copy->comm.pid);
1441 ByteSwap(&event_copy->comm.tid);
1442 break;
1443 case PERF_RECORD_LOST:
1444 ByteSwap(&event_copy->lost.id);
1445 ByteSwap(&event_copy->lost.lost);
1446 break;
1447 case PERF_RECORD_READ:
1448 ByteSwap(&event_copy->read.pid);
1449 ByteSwap(&event_copy->read.tid);
1450 ByteSwap(&event_copy->read.value);
1451 ByteSwap(&event_copy->read.time_enabled);
1452 ByteSwap(&event_copy->read.time_running);
1453 ByteSwap(&event_copy->read.id);
1454 break;
1455 default:
1456 LOG(FATAL) << "Unknown event type: " << type;
1457 }
1458 }
1459
1460 events_.push_back(std::move(event_copy));
1461
1462 return true;
1463 }
1464
GetNumMetadata() const1465 size_t PerfReader::GetNumMetadata() const {
1466 // This is just the number of 1s in the binary representation of the metadata
1467 // mask. However, make sure to only use supported metadata, and don't include
1468 // branch stack (since it doesn't have an entry in the metadata section).
1469 uint64_t new_mask = metadata_mask_;
1470 new_mask &= kSupportedMetadataMask & ~(1 << HEADER_BRANCH_STACK);
1471 std::bitset<sizeof(new_mask) * CHAR_BIT> bits(new_mask);
1472 return bits.count();
1473 }
1474
GetEventDescMetadataSize() const1475 size_t PerfReader::GetEventDescMetadataSize() const {
1476 size_t size = 0;
1477 if (event_types_.empty()) {
1478 return size;
1479 }
1480 if (metadata_mask_ & (1 << HEADER_EVENT_DESC)) {
1481 if (event_types_.size() > 0 && event_types_.size() != attrs_.size()) {
1482 LOG(ERROR) << "Mismatch between number of event type events and attr "
1483 << "events: " << event_types_.size() << " vs "
1484 << attrs_.size();
1485 return size;
1486 }
1487 size += sizeof(event_desc_num_events) + sizeof(event_desc_attr_size);
1488 CStringWithLength dummy;
1489 for (size_t i = 0; i < attrs_.size(); ++i) {
1490 size += sizeof(perf_event_attr) + sizeof(dummy.len);
1491 size += sizeof(event_desc_num_unique_ids);
1492 size += GetUint64AlignedStringLength(event_types_[i].name) * sizeof(char);
1493 size += attrs_[i].ids.size() * sizeof(attrs_[i].ids[0]);
1494 }
1495 }
1496 return size;
1497 }
1498
GetBuildIDMetadataSize() const1499 size_t PerfReader::GetBuildIDMetadataSize() const {
1500 size_t size = 0;
1501 for (size_t i = 0; i < build_id_events_.size(); ++i)
1502 size += build_id_events_[i]->header.size;
1503 return size;
1504 }
1505
GetStringMetadataSize() const1506 size_t PerfReader::GetStringMetadataSize() const {
1507 size_t size = 0;
1508 for (size_t i = 0; i < string_metadata_.size(); ++i) {
1509 const PerfStringMetadata& metadata = string_metadata_[i];
1510 if (NeedsNumberOfStringData(metadata.type))
1511 size += sizeof(num_string_data_type);
1512
1513 for (size_t j = 0; j < metadata.data.size(); ++j) {
1514 const CStringWithLength& str = metadata.data[j];
1515 size += sizeof(str.len) + (str.len * sizeof(char));
1516 }
1517 }
1518 return size;
1519 }
1520
GetUint32MetadataSize() const1521 size_t PerfReader::GetUint32MetadataSize() const {
1522 size_t size = 0;
1523 for (size_t i = 0; i < uint32_metadata_.size(); ++i) {
1524 const PerfUint32Metadata& metadata = uint32_metadata_[i];
1525 size += metadata.data.size() * sizeof(metadata.data[0]);
1526 }
1527 return size;
1528 }
1529
GetUint64MetadataSize() const1530 size_t PerfReader::GetUint64MetadataSize() const {
1531 size_t size = 0;
1532 for (size_t i = 0; i < uint64_metadata_.size(); ++i) {
1533 const PerfUint64Metadata& metadata = uint64_metadata_[i];
1534 size += metadata.data.size() * sizeof(metadata.data[0]);
1535 }
1536 return size;
1537 }
1538
GetCPUTopologyMetadataSize() const1539 size_t PerfReader::GetCPUTopologyMetadataSize() const {
1540 // Core siblings.
1541 size_t size = sizeof(num_siblings_type);
1542 for (size_t i = 0; i < cpu_topology_.core_siblings.size(); ++i) {
1543 const CStringWithLength& str = cpu_topology_.core_siblings[i];
1544 size += sizeof(str.len) + (str.len * sizeof(char));
1545 }
1546
1547 // Thread siblings.
1548 size += sizeof(num_siblings_type);
1549 for (size_t i = 0; i < cpu_topology_.thread_siblings.size(); ++i) {
1550 const CStringWithLength& str = cpu_topology_.thread_siblings[i];
1551 size += sizeof(str.len) + (str.len * sizeof(char));
1552 }
1553
1554 return size;
1555 }
1556
GetNUMATopologyMetadataSize() const1557 size_t PerfReader::GetNUMATopologyMetadataSize() const {
1558 size_t size = sizeof(numa_topology_num_nodes_type);
1559 for (size_t i = 0; i < numa_topology_.size(); ++i) {
1560 const PerfNodeTopologyMetadata& node = numa_topology_[i];
1561 size += sizeof(node.id);
1562 size += sizeof(node.total_memory) + sizeof(node.free_memory);
1563 size += sizeof(node.cpu_list.len) + node.cpu_list.len * sizeof(char);
1564 }
1565 return size;
1566 }
1567
NeedsNumberOfStringData(u32 type) const1568 bool PerfReader::NeedsNumberOfStringData(u32 type) const {
1569 return type == HEADER_CMDLINE;
1570 }
1571
LocalizeMMapFilenames(const std::map<string,string> & filename_map)1572 bool PerfReader::LocalizeMMapFilenames(
1573 const std::map<string, string>& filename_map) {
1574 // Search for mmap/mmap2 events for which the filename needs to be updated.
1575 for (size_t i = 0; i < events_.size(); ++i) {
1576 string filename;
1577 size_t size_of_fixed_event_parts;
1578 event_t* event = events_[i].get();
1579 if (event->header.type == PERF_RECORD_MMAP) {
1580 filename = string(event->mmap.filename);
1581 size_of_fixed_event_parts =
1582 sizeof(event->mmap) - sizeof(event->mmap.filename);
1583 } else if (event->header.type == PERF_RECORD_MMAP2) {
1584 filename = string(event->mmap2.filename);
1585 size_of_fixed_event_parts =
1586 sizeof(event->mmap2) - sizeof(event->mmap2.filename);
1587 } else {
1588 continue;
1589 }
1590
1591 const auto it = filename_map.find(filename);
1592 if (it == filename_map.end()) // not found
1593 continue;
1594
1595 const string& new_filename = it->second;
1596 size_t old_len = GetUint64AlignedStringLength(filename);
1597 size_t new_len = GetUint64AlignedStringLength(new_filename);
1598 size_t old_offset = GetPerfSampleDataOffset(*event);
1599 size_t sample_size = event->header.size - old_offset;
1600
1601 int size_change = new_len - old_len;
1602 size_t new_size = event->header.size + size_change;
1603 size_t new_offset = old_offset + size_change;
1604
1605 if (size_change > 0) {
1606 // Allocate memory for a new event.
1607 event_t* old_event = event;
1608 malloced_unique_ptr<event_t> new_event(CallocMemoryForEvent(new_size));
1609
1610 // Copy over everything except filename and sample info.
1611 memcpy(new_event.get(), old_event, size_of_fixed_event_parts);
1612
1613 // Copy over the sample info to the correct location.
1614 char* old_addr = reinterpret_cast<char*>(old_event);
1615 char* new_addr = reinterpret_cast<char*>(new_event.get());
1616 memcpy(new_addr + new_offset, old_addr + old_offset, sample_size);
1617
1618 events_[i] = std::move(new_event);
1619 event = events_[i].get();
1620 } else if (size_change < 0) {
1621 // Move the perf sample data to its new location.
1622 // Since source and dest could overlap, use memmove instead of memcpy.
1623 char* start_addr = reinterpret_cast<char*>(event);
1624 memmove(start_addr + new_offset, start_addr + old_offset, sample_size);
1625 }
1626
1627 // Copy over the new filename and fix the size of the event.
1628 char *event_filename = nullptr;
1629 if (event->header.type == PERF_RECORD_MMAP) {
1630 event_filename = event->mmap.filename;
1631 } else if (event->header.type == PERF_RECORD_MMAP2) {
1632 event_filename = event->mmap2.filename;
1633 } else {
1634 LOG(FATAL) << "Unexpected event type"; // Impossible
1635 }
1636 CHECK_GT(snprintf(event_filename, new_filename.size() + 1, "%s",
1637 new_filename.c_str()),
1638 0);
1639 event->header.size = new_size;
1640 }
1641
1642 return true;
1643 }
1644
1645 } // namespace quipper
1646