1#!/usr/bin/env python3
2#
3#   Copyright 2018 - The Android Open Source Project
4#
5#   Licensed under the Apache License, Version 2.0 (the "License");
6#   you may not use this file except in compliance with the License.
7#   You may obtain a copy of the License at
8#
9#       http://www.apache.org/licenses/LICENSE-2.0
10#
11#   Unless required by applicable law or agreed to in writing, software
12#   distributed under the License is distributed on an "AS IS" BASIS,
13#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14#   See the License for the specific language governing permissions and
15#   limitations under the License.
16
17"""Processes profiling data to output latency numbers."""
18#
19# Type "python latency.py -h" for help
20#
21# Currently the log data is assumed to be in the following format:
22# PROF:<event-id> <timestamp>
23# The <event-id> and <timestamp> can be in the form of any valid
24# (positive)integer literal in Python
25# Examples:
26#   PROF:0x0006 0x00000155e0d043f1
27#   PROF:6 1468355593201
28
29import argparse
30from collections import defaultdict
31import csv
32import logging
33import math
34import os
35import string
36import xml.etree.ElementTree as ET
37
38valid_fname_chars = '-_.()%s%s' % (string.ascii_letters, string.digits)
39PERCENTILE_STEP = 1
40PROFILER_DATA_PREFIX = 'PROF:'
41
42
43class EventPair(object):
44
45    def __init__(self, pair_id, latency, name):
46        self.pair_id = pair_id
47        self.latency = latency
48        self.name = name
49
50
51class LatencyEntry(object):
52
53    def __init__(self, start_timestamp, latency):
54        self.start_timestamp = start_timestamp
55        self.latency = latency
56
57
58def parse_xml(xml_file):
59    """
60    Parse the configuration xml file.
61
62    Returns:
63      event_pairs_by_pair_id: dict mapping event id to event pair object
64      event_pairs_by_start_id: dict mapping starting event to list of event pairs
65                               with that starting event.
66      event_pairs_by_end_id: dict mapping ending event to list of event pairs
67                             with that ending event.
68    """
69    root = ET.parse(xml_file).getroot()
70    event_pairs = root.findall('event-pair')
71    event_pairs_by_pair_id = {}
72    event_pairs_by_start_id = defaultdict(list)
73    event_pairs_by_end_id = defaultdict(list)
74
75    for event_pair in event_pairs:
76        start_evt = root.find(
77            "./event[@id='{0:}']".format(event_pair.attrib['start-event']))
78        end_evt = root.find(
79            "./event[@id='{0:}']".format(event_pair.attrib['end-event']))
80        start = int(start_evt.attrib['id'], 0)
81        end = int(end_evt.attrib['id'], 0)
82        paird_id = start << 32 | end
83        if paird_id in event_pairs_by_pair_id:
84            logging.error('Latency event repeated: start id = %d, end id = %d',
85                          start,
86                          end)
87            continue
88        # Create the output file name base by concatenating:
89        # "input file name base" + start event name + "_to_" + end event name
90        evt_pair_name = start_evt.attrib['name'] + '_to_' + end_evt.attrib[
91            'name']
92        evt_pair_name = [
93            c if c in valid_fname_chars else '_' for c in evt_pair_name
94        ]
95        evt_pair_name = ''.join(evt_pair_name)
96        evt_list = EventPair(paird_id, 0, evt_pair_name)
97        event_pairs_by_pair_id[paird_id] = evt_list
98        event_pairs_by_start_id[start].append(evt_list)
99        event_pairs_by_end_id[end].append(evt_list)
100    return (event_pairs_by_pair_id, event_pairs_by_start_id,
101            event_pairs_by_end_id)
102
103
104def percentile_to_index(num_entries, percentile):
105    """
106    Returns the index in an array corresponding to a percentile.
107
108    Arguments:
109      num_entries: the number of entries in the array.
110      percentile: which percentile to calculate the index for.
111    Returns:
112      ind: the index in the array corresponding to the percentile.
113    """
114    ind = int(math.floor(float(num_entries) * percentile / 100))
115    if ind > 0:
116        ind -= 1
117    return ind
118
119
120def compute_latencies(input_file, event_pairs_by_start_id,
121                      event_pairs_by_end_id):
122    """Parse the input data file and compute latencies."""
123    line_num = 0
124    lat_tables_by_pair_id = defaultdict(list)
125    while True:
126        line_num += 1
127        line = input_file.readline()
128        if not line:
129            break
130        data = line.partition(PROFILER_DATA_PREFIX)[2]
131        if not data:
132            continue
133        try:
134            event_id, timestamp = [int(x, 0) for x in data.split()]
135        except ValueError:
136            logging.error('Badly formed event entry at line #%s: %s', line_num,
137                          line)
138            continue
139        # We use event_pair.latency to temporarily store the timestamp
140        # of the start event
141        for event_pair in event_pairs_by_start_id[event_id]:
142            event_pair.latency = timestamp
143        for event_pair in event_pairs_by_end_id[event_id]:
144            # compute the latency only if we have seen the corresponding
145            # start event already
146            if event_pair.latency:
147                lat_tables_by_pair_id[event_pair.pair_id].append(
148                    LatencyEntry(event_pair.latency,
149                                 timestamp - event_pair.latency))
150                event_pair.latency = 0
151    return lat_tables_by_pair_id
152
153
154def write_data(fname_base, event_pairs_by_pair_id, lat_tables_by_pair_id):
155    for event_id, lat_table in lat_tables_by_pair_id.items():
156        event_pair = event_pairs_by_pair_id[event_id]
157        with open(fname_base + '_' + event_pair.name + '_data.csv',
158                  'wb') as out_file:
159            csv_writer = csv.writer(out_file)
160            for dat in lat_table:
161                csv_writer.writerow([dat.start_timestamp, dat.latency])
162
163
164def write_summary(fname_base, event_pairs_by_pair_id, lat_tables_by_pair_id):
165    summaries = get_summaries(event_pairs_by_pair_id, lat_tables_by_pair_id)
166    for event_id, lat_table in lat_tables_by_pair_id.items():
167        event_pair = event_pairs_by_pair_id[event_id]
168        summary = summaries[event_pair.name]
169        latencies = summary['latencies']
170        num_latencies = summary['num_latencies']
171        with open(fname_base + '_' + event_pair.name + '_summary.txt',
172                  'wb') as out_file:
173            csv_writer = csv.writer(out_file)
174            csv_writer.writerow(['Percentile', 'Latency'])
175            # Write percentile table
176            for percentile in range(1, 101):
177                ind = percentile_to_index(num_latencies, percentile)
178                csv_writer.writerow([percentile, latencies[ind]])
179
180            # Write summary
181            print('\n\nTotal number of samples = {}'.format(num_latencies),
182                  file=out_file)
183            print('Min = {}'.format(summary['min_lat']), file=out_file)
184            print('Max = {}'.format(summary['max_lat']), file=out_file)
185            print('Average = {}'.format(summary['average_lat']), file=out_file)
186            print('Median = {}'.format(summary['median_lat']), file=out_file)
187            print('90 %ile = {}'.format(summary['90pctile']), file=out_file)
188            print('95 %ile = {}'.format(summary['95pctile']), file=out_file)
189
190
191def process_latencies(config_xml, input_file):
192    """
193    End to end function to compute latencies and summaries from input file.
194    Writes latency results to files in current directory.
195
196    Arguments:
197       config_xml: xml file specifying which event pairs to compute latency
198                   btwn.
199       input_file: text file containing the timestamped events, like a log file.
200    """
201    # Parse the event configuration file
202    (event_pairs_by_pair_id, event_pairs_by_start_id,
203     event_pairs_by_end_id) = parse_xml(config_xml)
204    # Compute latencies
205    lat_tables_by_pair_id = compute_latencies(input_file,
206                                              event_pairs_by_start_id,
207                                              event_pairs_by_end_id)
208    fname_base = os.path.splitext(os.path.basename(input_file.name))[0]
209    # Write the latency data and summary to respective files
210    write_data(fname_base, event_pairs_by_pair_id, lat_tables_by_pair_id)
211    write_summary(fname_base, event_pairs_by_pair_id, lat_tables_by_pair_id)
212
213
214def get_summaries(event_pairs_by_pair_id, lat_tables_by_pair_id):
215    """
216    Process significant summaries from a table of latencies.
217
218    Arguments:
219      event_pairs_by_pair_id: dict mapping event id to event pair object
220      lat_tables_by_pair_id: dict mapping event id to latency table
221    Returns:
222      summaries: dict mapping event pair name to significant summary metrics.
223    """
224    summaries = {}
225    for event_id, lat_table in lat_tables_by_pair_id.items():
226        event_summary = {}
227        event_pair = event_pairs_by_pair_id[event_id]
228        latencies = [entry.latency for entry in lat_table]
229        latencies.sort()
230        event_summary['latencies'] = latencies
231        event_summary['num_latencies'] = len(latencies)
232        event_summary['min_lat'] = latencies[0]
233        event_summary['max_lat'] = latencies[-1]
234        event_summary['average_lat'] = sum(latencies) / len(latencies)
235        event_summary['median'] = latencies[len(latencies) // 2]
236        event_summary['90pctile'] = latencies[percentile_to_index(
237            len(latencies), 90)]
238        event_summary['95pctile'] = latencies[percentile_to_index(
239            len(latencies), 95)]
240        summaries[event_pair.name] = event_summary
241    return summaries
242
243
244def get_summaries_from_log(input_file_name, config_xml=None):
245    """
246    End to end function to compute latencies and summaries from input file.
247    Returns a summary dictionary.
248
249    Arguments:
250      input_file_name: text file containing the timestamped events, like a
251                       log file.
252      config_xml: xml file specifying which event pairs to compute latency btwn.
253    Returns:
254      summaries: dict mapping event pair name to significant summary metrics.
255    """
256    config_xml = config_xml or os.path.join(os.path.dirname(__file__),
257                                            'latency.xml')
258    (event_pairs_by_pair_id, event_pairs_by_start_id,
259     event_pairs_by_end_id) = parse_xml(config_xml)
260    # Compute latencies
261    input_file = open(input_file_name, 'r')
262    lat_tables_by_pair_id = compute_latencies(input_file,
263                                              event_pairs_by_start_id,
264                                              event_pairs_by_end_id)
265    return get_summaries(event_pairs_by_pair_id, lat_tables_by_pair_id)
266
267
268if __name__ == '__main__':
269    # Parse command-line arguments
270    parser = argparse.ArgumentParser(
271        description='Processes profiling data to output latency numbers')
272    parser.add_argument(
273        '--events-config',
274        type=argparse.FileType('r'),
275        default=os.path.join(os.path.dirname(__file__), 'latency.xml'),
276        help='The configuration XML file for events.'
277             ' If not specified uses latency.xml from current folder')
278    parser.add_argument(
279        'input', type=argparse.FileType('r'), help='The input log')
280    args = parser.parse_args()
281    process_latencies(args.events_config, args.input)
282