1# -*- coding: utf-8 -*-
2# Copyright 2018 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Python module to draw heat map for Chrome
7
8heat map is a histogram used to analyze the locality of function layout.
9
10This module is used by heat_map.py. HeatmapGenerator is a class to
11generate data for drawing heat maps (the actual drawing of heat maps is
12performed by another script perf-to-inst-page.sh). It can also analyze
13the symbol names in hot pages.
14"""
15
16from __future__ import division, print_function
17
18import bisect
19import collections
20import os
21import pipes
22import subprocess
23
24from cros_utils import command_executer
25
26HugepageRange = collections.namedtuple('HugepageRange', ['start', 'end'])
27
28
29class MMap(object):
30  """Class to store mmap information in perf report.
31
32  We assume ASLR is disabled, so MMap for all Chrome is assumed to be
33  the same. This class deals with the case hugepage creates several
34  mmaps for Chrome but should be merged together. In these case, we
35  assume the first MMAP is not affected by the bug and use the MMAP.
36  """
37
38  def __init__(self, addr, size, offset):
39    self.start_address = addr
40    self.size = size
41    self.offset = offset
42
43  def __str__(self):
44    return '(%x, %x, %x)' % (self.start_address, self.size, self.offset)
45
46  def merge(self, mmap):
47    # This function should not be needed, since we should only have
48    # one MMAP on Chrome of each process. This function only deals with
49    # images that is affected by http://crbug.com/931465.
50
51    # This function is only checking a few conditions to make sure
52    # the bug is within our expectation.
53
54    if self.start_address == mmap.start_address:
55      assert self.size >= mmap.size, \
56        'Original MMAP size(%x) is smaller than the forked process(%x).' % (
57            self.size, mmap.size)
58      # The case that the MMAP is forked from the previous process
59      # No need to do anything, OR
60      # The case where hugepage causes a small Chrome mmap.
61      # In this case, we use the prior MMAP for the whole Chrome
62      return
63
64    assert self.start_address < mmap.start_address, \
65      'Original MMAP starting address(%x) is larger than the forked' \
66      'process(%x).' % (self.start_address, mmap.start_address)
67
68    assert self.start_address + self.size >= mmap.start_address + mmap.size, \
69      'MMAP of the forked process exceeds the end of original MMAP.'
70
71
72class HeatmapGenerator(object):
73  """Class to generate heat map with a perf report, containing mmaps and
74
75  samples. This class contains two interfaces with other modules:
76  draw() and analyze().
77
78  draw() draws a heatmap with the sample information given in the perf report
79  analyze() prints out the symbol names in hottest pages with the given
80  chrome binary
81  """
82
83  def __init__(self,
84               perf_report,
85               page_size,
86               hugepage,
87               title,
88               log_level='verbose'):
89    self.perf_report = perf_report
90    # Pick 1G as a relatively large number. All addresses less than it will
91    # be recorded. The actual heatmap will show up to a boundary of the
92    # largest address in text segment.
93    self.max_addr = 1024 * 1024 * 1024
94    self.ce = command_executer.GetCommandExecuter(log_level=log_level)
95    self.dir = os.path.dirname(os.path.realpath(__file__))
96    with open(perf_report, 'r', encoding='utf-8') as f:
97      self.perf_report_contents = f.readlines()
98    # Write histogram results to a text file, in order to use gnu plot to draw
99    self.hist_temp_output = open('out.txt', 'w', encoding='utf-8')
100    self.processes = {}
101    self.deleted_processes = {}
102    self.count = 0
103    if hugepage:
104      self.hugepage = HugepageRange(start=hugepage[0], end=hugepage[1])
105    else:
106      self.hugepage = None
107    self.title = title
108    self.symbol_addresses = []
109    self.symbol_names = []
110    self.page_size = page_size
111
112  def _parse_perf_sample(self, line):
113    # In a perf report, generated with -D, a PERF_RECORD_SAMPLE command should
114    # look like this: TODO: some arguments are unknown
115    #
116    # cpuid cycle unknown [unknown]: PERF_RECORD_SAMPLE(IP, 0x2): pid/tid:
117    # 0xaddr period: period addr: addr
118    # ... thread: threadname:tid
119    # ...... dso: process
120    #
121    # This is an example:
122    # 1 136712833349 0x6a558 [0x30]: PERF_RECORD_SAMPLE(IP, 0x2): 5227/5227:
123    # 0x55555683b810 period: 372151 addr: 0
124    # ... thread: chrome:5227
125    # ...... dso: /opt/google/chrome/chrome
126    #
127    # For this function, the 7th argument (args[6]) after spltting with spaces
128    # is pid/tid. We use the combination of the two as the pid.
129    # Also, we add an assertion here to check the tid in the 7th argument(
130    # args[6]) and the 15th argument(arg[14]) are the same
131    #
132    # The function returns the ((pid,tid), address) pair if the sampling
133    # is on Chrome. Otherwise, return (None, None) pair.
134
135    if 'thread: chrome' not in line or \
136    'dso: /opt/google/chrome/chrome' not in line:
137      return None, None
138    args = line.split(' ')
139    pid_raw = args[6].split('/')
140    assert pid_raw[1][:-1] == args[14].split(':')[1][:-1], \
141    'TID in %s of sample is not the same: %s/%s' % (
142        line[:-1], pid_raw[1][:-1], args[14].split(':')[1][:-1])
143    key = (int(pid_raw[0]), int(pid_raw[1][:-1]))
144    address = int(args[7], base=16)
145    return key, address
146
147  def _parse_perf_record(self, line):
148    # In a perf report, generated with -D, a PERF_RECORD_MMAP2 command should
149    # look like this: TODO: some arguments are unknown
150    #
151    # cpuid cycle unknown [unknown]: PERF_RECORD_MMAP2 pid/tid:
152    # [0xaddr(0xlength) @ pageoffset maj:min ino ino_generation]:
153    # permission process
154    #
155    # This is an example.
156    # 2 136690556823 0xa6898 [0x80]: PERF_RECORD_MMAP2 5227/5227:
157    # [0x555556496000(0x8d1b000) @ 0xf42000 b3:03 92844 1892514370]:
158    # r-xp /opt/google/chrome/chrome
159    #
160    # For this function, the 6th argument (args[5]) after spltting with spaces
161    # is pid/tid. We use the combination of the two as the pid.
162    # The 7th argument (args[6]) is the [0xaddr(0xlength). We can peel the
163    # string to get the address and size of the mmap.
164    # The 9th argument (args[8]) is the page offset.
165    # The function returns the ((pid,tid), mmap) pair if the mmap is for Chrome
166    # is on Chrome. Otherwise, return (None, None) pair.
167
168    if 'chrome/chrome' not in line:
169      return None, None
170    args = line.split(' ')
171    pid_raw = args[5].split('/')
172    assert pid_raw[0] == pid_raw[1][:-1], \
173    'PID in %s of mmap is not the same: %s/%s' % (
174        line[:-1], pid_raw[0], pid_raw[1])
175    pid = (int(pid_raw[0]), int(pid_raw[1][:-1]))
176    address_raw = args[6].split('(')
177    start_address = int(address_raw[0][1:], base=16)
178    size = int(address_raw[1][:-1], base=16)
179    offset = int(args[8], base=16)
180    # Return an mmap object instead of only starting address,
181    # in case there are many mmaps for the sample PID
182    return pid, MMap(start_address, size, offset)
183
184  def _parse_pair_event(self, arg):
185    # This function is called by the _parse_* functions that has a pattern of
186    # pids like: (pid:tid):(pid:tid), i.e.
187    # PERF_RECORD_FORK and PERF_RECORD_COMM
188    _, remain = arg.split('(', 1)
189    pid1, remain = remain.split(':', 1)
190    pid2, remain = remain.split(')', 1)
191    _, remain = remain.split('(', 1)
192    pid3, remain = remain.split(':', 1)
193    pid4, remain = remain.split(')', 1)
194    return (int(pid1), int(pid2)), (int(pid3), int(pid4))
195
196  def _process_perf_record(self, line):
197    # This function calls _parse_perf_record() to get information from
198    # PERF_RECORD_MMAP2. It records the mmap object for each pid (a pair of
199    # pid,tid), into a dictionary.
200    pid, mmap = self._parse_perf_record(line)
201    if pid is None:
202      # PID = None meaning the mmap is not for chrome
203      return
204    if pid in self.processes:
205      # This should never happen for a correct profiling result, as we
206      # should only have one MMAP for Chrome for each process.
207      # If it happens, see http://crbug.com/931465
208      self.processes[pid].merge(mmap)
209    else:
210      self.processes[pid] = mmap
211
212  def _process_perf_fork(self, line):
213    # In a perf report, generated with -D, a PERF_RECORD_FORK command should
214    # look like this:
215    #
216    # cpuid cycle unknown [unknown]:
217    # PERF_RECORD_FORK(pid_to:tid_to):(pid_from:tid_from)
218    #
219    # This is an example.
220    # 0 0 0x22a8 [0x38]: PERF_RECORD_FORK(1:1):(0:0)
221    #
222    # In this function, we need to peel the information of pid:tid pairs
223    # So we get the last argument and send it to function _parse_pair_event()
224    # for analysis.
225    # We use (pid, tid) as the pid.
226    args = line.split(' ')
227    pid_to, pid_from = self._parse_pair_event(args[-1])
228    if pid_from in self.processes:
229      assert pid_to not in self.processes
230      self.processes[pid_to] = MMap(self.processes[pid_from].start_address,
231                                    self.processes[pid_from].size,
232                                    self.processes[pid_from].offset)
233
234  def _process_perf_exit(self, line):
235    # In a perf report, generated with -D, a PERF_RECORD_EXIT command should
236    # look like this:
237    #
238    # cpuid cycle unknown [unknown]:
239    # PERF_RECORD_EXIT(pid1:tid1):(pid2:tid2)
240    #
241    # This is an example.
242    # 1 136082505621 0x30810 [0x38]: PERF_RECORD_EXIT(3851:3851):(3851:3851)
243    #
244    # In this function, we need to peel the information of pid:tid pairs
245    # So we get the last argument and send it to function _parse_pair_event()
246    # for analysis.
247    # We use (pid, tid) as the pid.
248    args = line.split(' ')
249    pid_to, pid_from = self._parse_pair_event(args[-1])
250    assert pid_to == pid_from, '(%d, %d) (%d, %d)' % (pid_to[0], pid_to[1],
251                                                      pid_from[0], pid_from[1])
252    if pid_to in self.processes:
253      # Don't delete the process yet
254      self.deleted_processes[pid_from] = self.processes[pid_from]
255
256  def _process_perf_sample(self, line):
257    # This function calls _parse_perf_sample() to get information from
258    # the perf report.
259    # It needs to check the starting address of allocated mmap from
260    # the dictionary (self.processes) to calculate the offset within
261    # the text section of the sampling.
262    # The offset is calculated into pages (4KB or 2MB) and writes into
263    # out.txt together with the total counts, which will be used to
264    # calculate histogram.
265    pid, addr = self._parse_perf_sample(line)
266    if pid is None:
267      return
268
269    assert pid in self.processes and pid not in self.deleted_processes, \
270    'PID %d not found mmap and not forked from another process'
271
272    start_address = self.processes[pid].start_address
273    address = addr - start_address
274    assert address >= 0 and \
275    'addresses accessed in PERF_RECORD_SAMPLE should be larger than' \
276    ' the starting address of Chrome'
277    if address < self.max_addr:
278      self.count += 1
279      line = '%d/%d: %d %d' % (pid[0], pid[1], self.count,
280                               address // self.page_size * self.page_size)
281      if self.hugepage:
282        if self.hugepage.start <= address < self.hugepage.end:
283          line += ' hugepage'
284        else:
285          line += ' smallpage'
286      print(line, file=self.hist_temp_output)
287
288  def _read_perf_report(self):
289    # Serve as main function to read perf report, generated by -D
290    lines = iter(self.perf_report_contents)
291    for line in lines:
292      if 'PERF_RECORD_MMAP' in line:
293        self._process_perf_record(line)
294      elif 'PERF_RECORD_FORK' in line:
295        self._process_perf_fork(line)
296      elif 'PERF_RECORD_EXIT' in line:
297        self._process_perf_exit(line)
298      elif 'PERF_RECORD_SAMPLE' in line:
299        # Perf sample is multi-line
300        self._process_perf_sample(line + next(lines) + next(lines))
301    self.hist_temp_output.close()
302
303  def _draw_heat_map(self):
304    # Calls a script (perf-to-inst-page.sh) to calculate histogram
305    # of results written in out.txt and also generate pngs for
306    # heat maps.
307    heatmap_script = os.path.join(self.dir, 'perf-to-inst-page.sh')
308    if self.hugepage:
309      hp_arg = 'hugepage'
310    else:
311      hp_arg = 'none'
312
313    cmd = '{0} {1} {2}'.format(heatmap_script, pipes.quote(self.title), hp_arg)
314    retval = self.ce.RunCommand(cmd)
315    if retval:
316      raise RuntimeError('Failed to run script to generate heatmap')
317
318  def _restore_histogram(self):
319    # When hugepage is used, there are two files inst-histo-{hp,sp}.txt
320    # So we need to read in all the files.
321    names = [x for x in os.listdir('.') if 'inst-histo' in x and '.txt' in x]
322    hist = {}
323    for n in names:
324      with open(n, encoding='utf-8') as f:
325        for l in f.readlines():
326          num, addr = l.strip().split(' ')
327          assert int(addr) not in hist
328          hist[int(addr)] = int(num)
329    return hist
330
331  def _read_symbols_from_binary(self, binary):
332    # FIXME: We are using nm to read symbol names from Chrome binary
333    # for now. Can we get perf to hand us symbol names, instead of
334    # using nm in the future?
335    #
336    # Get all the symbols (and their starting addresses) that fall into
337    # the page. Will be used to print out information of hot pages
338    # Each line shows the information of a symbol:
339    # [symbol value (0xaddr)] [symbol type] [symbol name]
340    # For some symbols, the [symbol name] field might be missing.
341    # e.g.
342    # 0000000001129da0 t Builtins_LdaNamedPropertyHandler
343
344    # Generate a list of symbols from nm tool and check each line
345    # to extract symbols names
346    text_section_start = 0
347    for l in subprocess.check_output(['nm', '-n', binary]).split('\n'):
348      args = l.strip().split(' ')
349      if len(args) < 3:
350        # No name field
351        continue
352      addr_raw, symbol_type, name = args
353      addr = int(addr_raw, base=16)
354      if 't' not in symbol_type and 'T' not in symbol_type:
355        # Filter out symbols not in text sections
356        continue
357      if not self.symbol_addresses:
358        # The first symbol in text sections
359        text_section_start = addr
360        self.symbol_addresses.append(0)
361        self.symbol_names.append(name)
362      else:
363        assert text_section_start != 0, \
364        'The starting address of text section has not been found'
365        if addr == self.symbol_addresses[-1]:
366          # if the same address has multiple symbols, put them together
367          # and separate symbol names with '/'
368          self.symbol_names[-1] += '/' + name
369        else:
370          # The output of nm -n command is already sorted by address
371          # Insert to the end will result in a sorted array for bisect
372          self.symbol_addresses.append(addr - text_section_start)
373          self.symbol_names.append(name)
374
375  def _map_addr_to_symbol(self, addr):
376    # Find out the symbol name
377    assert self.symbol_addresses
378    index = bisect.bisect(self.symbol_addresses, addr)
379    assert 0 < index <= len(self.symbol_names), \
380    'Failed to find an index (%d) in the list (len=%d)' % (
381        index, len(self.symbol_names))
382    return self.symbol_names[index - 1]
383
384  def _print_symbols_in_hot_pages(self, fp, pages_to_show):
385    # Print symbols in all the pages of interest
386    for page_num, sample_num in pages_to_show:
387      print(
388          '----------------------------------------------------------', file=fp)
389      print(
390          'Page Offset: %d MB, Count: %d' % (page_num // 1024 // 1024,
391                                             sample_num),
392          file=fp)
393
394      symbol_counts = collections.Counter()
395      # Read Sample File and find out the occurance of symbols in the page
396      lines = iter(self.perf_report_contents)
397      for line in lines:
398        if 'PERF_RECORD_SAMPLE' in line:
399          pid, addr = self._parse_perf_sample(line + next(lines) + next(lines))
400          if pid is None:
401            # The sampling is not on Chrome
402            continue
403          if addr // self.page_size != (
404              self.processes[pid].start_address + page_num) // self.page_size:
405            # Sampling not in the current page
406            continue
407
408          name = self._map_addr_to_symbol(addr -
409                                          self.processes[pid].start_address)
410          assert name, 'Failed to find symbol name of addr %x' % addr
411          symbol_counts[name] += 1
412
413      assert sum(symbol_counts.values()) == sample_num, \
414      'Symbol name matching missing for some addresses: %d vs %d' % (
415          sum(symbol_counts.values()), sample_num)
416
417      # Print out the symbol names sorted by the number of samples in
418      # the page
419      for name, count in sorted(
420          symbol_counts.items(), key=lambda kv: kv[1], reverse=True):
421        if count == 0:
422          break
423        print('> %s : %d' % (name, count), file=fp)
424      print('\n\n', file=fp)
425
426  def draw(self):
427    # First read perf report to process information and save histogram
428    # into a text file
429    self._read_perf_report()
430    # Then use gnu plot to draw heat map
431    self._draw_heat_map()
432
433  def analyze(self, binary, top_n):
434    # Read histogram from histo.txt
435    hist = self._restore_histogram()
436    # Sort the pages in histogram
437    sorted_hist = sorted(hist.items(), key=lambda value: value[1], reverse=True)
438
439    # Generate symbolizations
440    self._read_symbols_from_binary(binary)
441
442    # Write hottest pages
443    with open('addr2symbol.txt', 'w', encoding='utf-8') as fp:
444      if self.hugepage:
445        # Print hugepage region first
446        print(
447            'Hugepage top %d hot pages (%d MB - %d MB):' %
448            (top_n, self.hugepage.start // 1024 // 1024,
449             self.hugepage.end // 1024 // 1024),
450            file=fp)
451        pages_to_print = [(k, v)
452                          for k, v in sorted_hist
453                          if self.hugepage.start <= k < self.hugepage.end
454                         ][:top_n]
455        self._print_symbols_in_hot_pages(fp, pages_to_print)
456        print('==========================================', file=fp)
457        print('Top %d hot pages landed outside of hugepage:' % top_n, file=fp)
458        # Then print outside pages
459        pages_to_print = [(k, v)
460                          for k, v in sorted_hist
461                          if k < self.hugepage.start or k >= self.hugepage.end
462                         ][:top_n]
463        self._print_symbols_in_hot_pages(fp, pages_to_print)
464      else:
465        # Print top_n hottest pages.
466        pages_to_print = sorted_hist[:top_n]
467        self._print_symbols_in_hot_pages(fp, pages_to_print)
468