1#!/usr/bin/env python
2#
3# Copyright (C) 2016 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17"""Analyze ext4 trace with custom open trace"""
18import collections
19import math
20import os
21import re
22import string
23import sys
24
25DBG = False
26
27# hard coded maps to detect partition for given device or the other way around
28# this can be different per each device. This works for marlin.
29DEVICE_TO_PARTITION = { "253,0": "/system/", "253,1": "/vendor/", "259,19": "/data/" }
30PARTITION_TO_DEVICE = {}
31for key, value in DEVICE_TO_PARTITION.iteritems():
32  PARTITION_TO_DEVICE[value] = key
33
34RE_DO_SYS_OPEN = r""".+\s+([0-9]+\.[0-9]+):\s+do_sys_open:\s+(\S+):\sopen..(\S+).,\s([0-9]+).\s+.+inode\s=\s([0-9]+)"""
35RE_EXT4_MA_BLOCKS_ENTER = r"""\s+(\S+)-([0-9]+).+\s+([0-9]+\.[0-9]+):\s+ext4_ext_map_blocks_enter:\s+dev\s+(\S+)\s+ino\s+([0-9]+)\s+lblk\s+([0-9]+)\s+len\s+([0-9]+)"""
36
37class FileEvent:
38  def __init__(self, open_time, file_name, process_name, inode, flags):
39    self.file_name = file_name
40    self.inode = inode
41    self.processes = []
42    self.processes.append((open_time, process_name, flags))
43    self.reads = []
44    self.total_reads = 0
45    self.total_open = 1
46    self.blocks = {}
47    self.total_rereads = 0
48    self.read_size_histogram = {} #key: read size, value: occurrence
49    self.single_block_reads = {} # process name, occurrence
50
51  def add_open(self, open_time, process_name, flags):
52    self.processes.append((open_time, process_name, flags))
53    self.total_open += 1
54
55  def add_read(self, time, offset, size, process_name):
56    self.reads.append((time, offset, size, process_name))
57    self.total_reads += size
58    for i in range(offset, offset + size):
59      if not self.blocks.get(i):
60        self.blocks[i] = 1
61      else:
62        self.blocks[i] += 1
63        self.total_rereads += 1
64    if not self.read_size_histogram.get(size):
65      self.read_size_histogram[size] = 1
66    else:
67      self.read_size_histogram[size] += 1
68    if size == 1:
69      if not self.single_block_reads.get(process_name):
70        self.single_block_reads[process_name] = 1
71      else:
72        self.single_block_reads[process_name] += 1
73
74  def dump(self):
75    print " filename %s, total reads %d, total open %d total rereads %d inode %s" \
76      % (self.file_name, self.total_reads, self.total_open, self.total_rereads, self.inode)
77    process_names = []
78    for opener in self.processes:
79      process_names.append(opener[1])
80    print "  Processes opened this file:", ','.join(process_names)
81    if len(self.read_size_histogram) > 1:
82      print "  Read size histograms:", collections.OrderedDict( \
83        sorted(self.read_size_histogram.items(), key = lambda item: item[0]))
84    if len(self.single_block_reads) > 1 and len(self.reads) > 1:
85      print "  Single block reads:", collections.OrderedDict( \
86        sorted(self.single_block_reads.items(), key = lambda item: item[1], reverse = True))
87
88class Trace:
89  def __init__(self):
90    self.files_per_device = {} # key: device, value: { key: inode, value; FileEvent }
91    self.re_open = re.compile(RE_DO_SYS_OPEN)
92    self.re_read = re.compile(RE_EXT4_MA_BLOCKS_ENTER)
93
94  def handle_line(self, line):
95    match = self.re_open.match(line)
96    if match:
97      self.handle_open(match)
98      return
99    match = self.re_read.match(line)
100    if match:
101      self.handle_read(match)
102      return
103
104  def handle_open(self, match):
105    time = match.group(1)
106    process_name = match.group(2)
107    file_name = match.group(3)
108    flag = match.group(4)
109    inode = match.group(5)
110    dev_name = None
111    for p in PARTITION_TO_DEVICE:
112      if file_name.startswith(p):
113        dev_name = PARTITION_TO_DEVICE[p]
114    if not dev_name:
115      if DBG:
116        print "Ignore open for file", file_name
117      return
118    files = self.files_per_device[dev_name]
119    fevent = files.get(inode)
120    if not fevent:
121      fevent = FileEvent(time, file_name, process_name, inode, flag)
122      files[inode] = fevent
123    else:
124      fevent.add_open(time, process_name, flag)
125
126  def handle_read(self, match):
127    process_name = match.group(1)
128    pid = match.group(2)
129    time = match.group(3)
130    dev = match.group(4)
131    inode = match.group(5)
132    offset = int(match.group(6))
133    size = int(match.group(7))
134    files = self.files_per_device.get(dev)
135    if not files:
136      if DEVICE_TO_PARTITION.get(dev):
137        files = {}
138        self.files_per_device[dev] = files
139      else:
140        if DBG:
141          print "read ignored for device", dev
142        return
143    fevent = files.get(inode)
144    if not fevent:
145      if DBG:
146        print 'no open for device %s with inode %s' % (dev, inode)
147      fevent = FileEvent(time, "unknown", process_name, inode, 0)
148      files[inode] = fevent
149    fevent.add_read(time, offset, size, process_name + "-" + pid)
150
151
152  def dump_partition(self, partition_name, files):
153    print "**Dump partition:", partition_name, "toal number of files:", len(files)
154    total_reads = 0
155    total_rereads = 0
156    vs = files.values()
157    vs.sort(key=lambda f : f.total_reads, reverse = True)
158    for f in vs:
159      f.dump()
160      total_reads += f.total_reads
161      total_rereads += f.total_rereads
162    print " Total reads for partition", total_reads, "rereads", total_rereads
163    return total_reads, total_rereads, len(files)
164
165
166  def dump(self):
167    print "Dump read per each partition"
168    total_reads = 0
169    total_rereads = 0
170    summaries = []
171    for d in self.files_per_device:
172      reads, rereads, num_files = self.dump_partition(DEVICE_TO_PARTITION[d], \
173        self.files_per_device[d])
174      total_reads += reads
175      total_rereads += rereads
176      summaries.append((DEVICE_TO_PARTITION[d], reads, rereads, num_files))
177    print "**Summary**"
178    print "Total blocks read", total_reads, "reread", total_rereads
179    print "Partition total_reads total_rereads num_files"
180    for s in summaries:
181      print s[0], s[1], s[2], s[3]
182
183def main(argv):
184  if (len(argv) < 2):
185    print "check_fule_read.py filename"
186    return
187  filename = argv[1]
188  trace = Trace()
189  with open(filename) as f:
190    for l in f:
191      trace.handle_line(l)
192  trace.dump()
193
194if __name__ == '__main__':
195  main(sys.argv)
196