1#!/usr/bin/env python
2#
3# Copyright (C) 2016 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17"""Analyze block trace"""
18
19import collections
20import os
21import re
22import string
23import sys
24
25# ex) <...>-52    [001] ...1     1.362574: block_bio_queue: 8,16 R 0 + 8 [kworker/u8:1]
26RE_BLOCK = r'.+-([0-9]+).*\s+([0-9]+\.[0-9]+):\s+block_bio_queue:\s+([0-9]+)\,([0-9]+)\s(\S+)\s+([0-9]+)\s+\+\s+([0-9]+)\s+\[([^\]]+)'
27# ex)  <...>-453   [001] d..4     3.181854: sched_blocked_reason: pid=471 iowait=1 caller=__wait_on_buffer+0x24/0x2c
28RE_SCHED_BLOCKED_READSON = r'.+-([0-9]+)\s+\[([0-9]+)\]\s.*\s+([0-9]+\.[0-9]+):\s+sched_blocked_reason:\spid=([0-9]+)\siowait=([01])\scaller=(\S+)'
29# ex) <idle>-0     [000] d..3     3.181864: sched_switch: prev_comm=swapper/0 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=ueventd next_pid=471 next_prio=120
30RE_SCHED_SWITCH = r'.+-([0-9]+)\s+\[([0-9]+)\]\s.*\s+([0-9]+\.[0-9]+):\s+sched_switch:\sprev_comm=(.+)\sprev_pid=([0-9]+)\sprev_prio=([0-9]+)\sprev_state=(\S+).*next_comm=(.+)\snext_pid=([0-9]+)\snext_prio=([0-9]+)'
31
32# dev_num = major * MULTIPLIER + minor
33DEV_MAJOR_MULTIPLIER = 1000
34
35# dm access is remapped to disk access. So account differently
36DM_MAJOR = 253
37
38MAX_PROCESS_DUMP = 10
39
40class RwEvent:
41  def __init__(self, block_num, start_time, size):
42    self.block_num = block_num
43    self.start_time = start_time
44    self.size = size
45
46def get_string_pos(strings, string_to_find):
47  for i, s in enumerate(strings):
48    if s == string_to_find:
49      return i
50  return -1
51
52class ProcessData:
53  def __init__(self, name):
54    self.name = name
55    self.reads = {} # k : dev_num, v : [] of reads
56    self.per_device_total_reads = {}
57    self.writes = {}
58    self.per_device_total_writes = {}
59    self.total_reads = 0
60    self.total_writes = 0
61    self.total_dm_reads = 0
62    self.total_dm_writes = 0
63
64
65  def add_read_event(self, major, minor, event):
66    devNum = major * DEV_MAJOR_MULTIPLIER + minor;
67    events = self.reads.get(devNum)
68    if not events:
69      events = []
70      self.reads[devNum] = events
71      self.per_device_total_reads[devNum] = 0
72    events.append(event)
73    self.total_reads += event.size
74    self.per_device_total_reads[devNum] += event.size
75
76  def add_write_event(self, major, minor, event):
77    devNum = major * DEV_MAJOR_MULTIPLIER + minor;
78    events = self.writes.get(devNum)
79    if not events:
80      events = []
81      self.writes[devNum] = events
82      self.per_device_total_writes[devNum] = 0
83    events.append(event)
84    self.total_writes += event.size
85    self.per_device_total_writes[devNum] += event.size
86
87  def add_dm_read(self, size):
88    self.total_dm_reads += size
89
90  def add_dm_write(self, size):
91    self.total_dm_writes += size
92
93  def dump(self):
94    print "Process,", self.name
95    print " total reads,", self.total_reads
96    print " total writes,", self.total_writes
97    print " total dm reads,", self.total_dm_reads
98    print " total dm writes,", self.total_dm_writes
99    print " R per device"
100    sorted_r = collections.OrderedDict(sorted(self.per_device_total_reads.items(), \
101      key = lambda item: item[1], reverse = True))
102    for i in range(len(sorted_r)):
103      dev = sorted_r.popitem(last=False)
104      print " ", dev[0],dev[1]
105
106    print " W per device"
107    sorted_w = collections.OrderedDict(sorted(self.per_device_total_writes.items(), \
108      key = lambda item: item[1], reverse = True))
109    for i in range(len(sorted_w)):
110      dev = sorted_w.popitem(last=False)
111      print " ", dev[0],dev[1]
112
113class IoTrace:
114
115  def __init__(self):
116    self.ios = {} #K: process name, v:ProcessData
117    self.total_reads = 0
118    self.total_writes = 0
119    self.total_reads_per_device = {} #K: block num, V: total blocks
120    self.total_writes_per_device = {}
121    self.total_dm_reads = {} #K: devnum, V: blocks
122    self.total_dm_writes = {}
123    self.re_block = re.compile(RE_BLOCK)
124
125  def parse(self, l):
126    match = self.re_block.match(l)
127    if not match:
128      return False
129    try:
130      self.do_parse_bio_queue(l, match)
131    except ValueError:
132      print "cannot parse:", l
133      raise
134    return True
135
136  def do_parse_bio_queue(self, l, match):
137    pid = match.group(1)
138    start_time = float(match.group(2))*1000 #ms
139    major = int(match.group(3))
140    minor =  int(match.group(4))
141    devNum = major * DEV_MAJOR_MULTIPLIER + minor;
142    operation =  match.group(5)
143    block_num = int(match.group(6))
144    size = int(match.group(7))
145    process = match.group(8) + "-" + pid
146    event = RwEvent(block_num, start_time, size)
147    io = self.ios.get(process)
148    if not io:
149      io = ProcessData(process)
150      self.ios[process] = io
151    if major == DM_MAJOR:
152      devNum = major * DEV_MAJOR_MULTIPLIER + minor;
153      if 'R' in operation[0]:
154        if devNum not in self.total_dm_reads:
155          self.total_dm_reads[devNum] = 0
156        self.total_dm_reads[devNum] += size
157        io.add_dm_read(size)
158      elif 'W' in operation[0]:
159        if devNum not in self.total_dm_writes:
160          self.total_dm_writes[devNum] = 0
161        self.total_dm_writes[devNum] += size
162        io.add_dm_write(size)
163      return
164    if 'R' in operation[0]:
165      io.add_read_event(major, minor, event)
166      self.total_reads += size
167      per_device = self.total_reads_per_device.get(devNum)
168      if not per_device:
169        self.total_reads_per_device[devNum] = 0
170      self.total_reads_per_device[devNum] += size
171    elif 'W' in operation[0]:
172      io.add_write_event(major, minor, event)
173      self.total_writes += size
174      per_device = self.total_writes_per_device.get(devNum)
175      if not per_device:
176        self.total_writes_per_device[devNum] = 0
177      self.total_writes_per_device[devNum] += size
178
179  def dump(self):
180    print "total read blocks,", self.total_reads
181    print "total write blocks,", self.total_writes
182    print "Total DM R"
183    for dev,size in self.total_dm_reads.items():
184      print dev, size
185    print "Total DM W"
186    for dev,size in self.total_dm_writes.items():
187      print dev, size
188    print "**Process total R/W"
189    sorted_by_total_rw = collections.OrderedDict(sorted(self.ios.items(), \
190      key = lambda item: item[1].total_reads + item[1].total_writes, reverse = True))
191    for i in range(MAX_PROCESS_DUMP):
192      process = sorted_by_total_rw.popitem(last=False)
193      if not process:
194        break
195      process[1].dump()
196
197    print "**Process total W"
198    sorted_by_total_w = collections.OrderedDict(sorted(self.ios.items(), \
199      key = lambda item: item[1].total_writes, reverse = True))
200    for i in range(5):
201      process = sorted_by_total_w.popitem(last=False)
202      if not process:
203        break
204      process[1].dump()
205
206    print "**Device total R"
207    sorted_by_total_r = collections.OrderedDict(sorted(self.total_reads_per_device.items(), \
208      key = lambda item: item[1], reverse = True))
209    for i in range(len(sorted_by_total_r)):
210      dev = sorted_by_total_r.popitem(last=False)
211      print dev[0],dev[1]
212
213    print "**Device total W"
214    sorted_by_total_w = collections.OrderedDict(sorted(self.total_writes_per_device.items(), \
215      key = lambda item: item[1], reverse = True))
216    for i in range(len(sorted_by_total_w)):
217      dev = sorted_by_total_w.popitem(last=False)
218      print dev[0],dev[1]
219
220class SchedProcess:
221  def __init__(self, pid):
222    self.pid = pid
223    self.name = "unknown"
224    self.total_execution_time = 0.0
225    self.total_io_wait_time = 0.0
226    self.total_other_wait_time = 0.0
227    self.waiting_calls = {} # k: waiting_call, v : waiting counter
228    self.io_waiting_call_times = {} # k: waiting_call, v: total wait time
229    self.in_iowait = False
230    self.last_waiting_call = None
231    self.last_switch_out_time = 0.0
232    self.last_switch_in_time = 0.0
233    self.last_core = -1
234    self.execution_time_per_core = {} # k: core, v : time
235    self.io_latency_histograms = {} # k : delay in ms, v : count
236
237  def handle_reason(self, current_time, iowait, waiting_call):
238    #if self.pid == 1232:
239    #  print current_time, iowait, waiting_call
240    if iowait == 1:
241      self.in_iowait = True
242    self.last_waiting_call = waiting_call
243    call_counter = self.waiting_calls.get(waiting_call, 0)
244    call_counter += 1
245    self.waiting_calls[waiting_call] = call_counter
246
247  def handle_switch_out(self, current_time, out_state, priority, name, core):
248    #if self.pid == 1232:
249    #  print "out", current_time, out_state
250    if self.name != name:
251      self.name = name
252    self.last_switch_out_time = current_time
253    if self.last_switch_in_time == 0.0: # switch in not recorded. ignore this one
254      return
255    execution_time = current_time - self.last_switch_in_time
256    self.total_execution_time += execution_time
257    core_execution_time = self.execution_time_per_core.get(core, 0.0)
258    core_execution_time += execution_time
259    self.execution_time_per_core[core] = core_execution_time
260
261  def handle_switch_in(self, current_time, priority, name, core):
262    #if self.pid == 1232:
263    #  print "in", current_time, self.in_iowait
264    if self.name != name:
265      self.name = name
266    self.last_switch_in_time = current_time
267    if self.last_switch_out_time == 0.0: # in without out, probably 1st
268      self.in_iowait = False
269      return
270    wait_time = current_time - self.last_switch_out_time
271    if self.in_iowait:
272      self.total_io_wait_time += wait_time
273      total_waiting_call_time = self.io_waiting_call_times.get(self.last_waiting_call, 0.0)
274      total_waiting_call_time += wait_time
275      self.io_waiting_call_times[self.last_waiting_call] = total_waiting_call_time
276      wait_time_ms = int(wait_time*10) / 10.0 # resolution up to 0.1 ms
277      histogram_count = self.io_latency_histograms.get(wait_time_ms, 0)
278      histogram_count += 1
279      self.io_latency_histograms[wait_time_ms] = histogram_count
280    else:
281      self.total_other_wait_time += wait_time
282    self.in_iowait = False
283
284
285  def dump(self):
286    print "PID:", self.pid, " name:", self.name
287    print " total execution time:", self.total_execution_time,\
288      " io wait:", self.total_io_wait_time, " other wait:", self.total_other_wait_time
289    sorted_data = collections.OrderedDict(sorted(self.execution_time_per_core.items(), \
290      key = lambda item: item[0], reverse = False))
291    print " Core execution:", sorted_data
292    sorted_data = collections.OrderedDict(sorted(self.waiting_calls.items(), \
293      key = lambda item: item[1], reverse = True))
294    print " Wait calls:", sorted_data
295    sorted_data = collections.OrderedDict(sorted(self.io_waiting_call_times.items(), \
296      key = lambda item: item[1], reverse = True))
297    print " IO Wait time per wait calls:", sorted_data
298    sorted_data = collections.OrderedDict(sorted(self.io_latency_histograms.items(), \
299      key = lambda item: item[0], reverse = False))
300    print " Wait time histogram:", sorted_data
301
302class SchedTrace:
303  def __init__(self):
304    self.re_switch = re.compile(RE_SCHED_SWITCH)
305    self.re_reason = re.compile(RE_SCHED_BLOCKED_READSON)
306    self.processes = {} # key: pid, v : SchedProcess
307
308  def parse(self, l):
309    checked_reason = False
310    match = self.re_switch.match(l)
311    if not match:
312      match = self.re_reason.match(l)
313      checked_reason = True
314    if not match:
315      return False
316    try:
317      if checked_reason:
318        self.do_handle_reason(l, match)
319      else:
320        self.do_handle_switch(l, match)
321    except ValueError:
322      print "cannot parse:", l
323      raise
324    return True
325
326  def do_handle_switch(self, l, match):
327    current_pid = int(match.group(1))
328    cpu_core = int(match.group(2))
329    current_time = float(match.group(3))*1000 #ms
330    out_name = match.group(4)
331    out_pid = int(match.group(5))
332    out_prio = int(match.group(6))
333    out_state = match.group(7)
334    in_name = match.group(8)
335    in_pid = int(match.group(9))
336    in_prio = int(match.group(10))
337    out_process = self.processes.get(out_pid)
338    if not out_process:
339      out_process = SchedProcess(out_pid)
340      self.processes[out_pid] = out_process
341    in_process = self.processes.get(in_pid)
342    if not in_process:
343      in_process = SchedProcess(in_pid)
344      self.processes[in_pid] = in_process
345    out_process.handle_switch_out(current_time, out_state, out_prio, out_name, cpu_core)
346    in_process.handle_switch_in(current_time, in_prio, in_name, cpu_core)
347
348  def do_handle_reason(self, l, match):
349    current_pid = int(match.group(1))
350    cpu_core = int(match.group(2))
351    current_time = float(match.group(3))*1000 #ms
352    pid = int(match.group(4))
353    iowait = int(match.group(5))
354    waiting_call = match.group(6)
355    process = self.processes.get(pid)
356    if not process:
357      process = SchedProcess(pid)
358      self.processes[pid] = process
359    process.handle_reason(current_time, iowait, waiting_call)
360
361  def dump(self):
362    sorted_by_total_execution = collections.OrderedDict(sorted(self.processes.items(), \
363      key = lambda item: item[1].total_io_wait_time, reverse = True))
364    for k, v in sorted_by_total_execution.iteritems():
365      if v.total_execution_time > 10.0 or v.total_io_wait_time != 0.0:
366        v.dump()
367
368def main(argv):
369  if (len(argv) < 2):
370    print "check_io_trace_all.py filename"
371    return
372  filename = argv[1]
373
374  io_trace = IoTrace()
375  sched_trace = SchedTrace()
376  with open(filename) as f:
377    for l in f:
378      if io_trace.parse(l):
379        continue
380      sched_trace.parse(l)
381  io_trace.dump()
382  print "\n\n\n"
383  sched_trace.dump()
384
385if __name__ == '__main__':
386  main(sys.argv)
387