1#!/usr/bin/python
2"""
3Postprocessing module for IOzone. It is capable to pick results from an
4IOzone run, calculate the geometric mean for all throughput results for
5a given file size or record size, and then generate a series of 2D and 3D
6graphs. The graph generation functionality depends on gnuplot, and if it
7is not present, functionality degrates gracefully.
8
9@copyright: Red Hat 2010
10"""
11import os, sys, optparse, logging, math, time
12import common
13from autotest_lib.client.common_lib import logging_config, logging_manager
14from autotest_lib.client.common_lib import error
15from autotest_lib.client.bin import utils, os_dep
16
17
18_LABELS = ['file_size', 'record_size', 'write', 'rewrite', 'read', 'reread',
19           'randread', 'randwrite', 'bkwdread', 'recordrewrite', 'strideread',
20           'fwrite', 'frewrite', 'fread', 'freread']
21
22
23def unique(list):
24    """
25    Return a list of the elements in list, but without duplicates.
26
27    @param list: List with values.
28    @return: List with non duplicate elements.
29    """
30    n = len(list)
31    if n == 0:
32        return []
33    u = {}
34    try:
35        for x in list:
36            u[x] = 1
37    except TypeError:
38        return None
39    else:
40        return u.keys()
41
42
43def geometric_mean(values):
44    """
45    Evaluates the geometric mean for a list of numeric values.
46
47    @param values: List with values.
48    @return: Single value representing the geometric mean for the list values.
49    @see: http://en.wikipedia.org/wiki/Geometric_mean
50    """
51    try:
52        values = [int(value) for value in values]
53    except ValueError:
54        return None
55    product = 1
56    n = len(values)
57    if n == 0:
58        return None
59    return math.exp(sum([math.log(x) for x in values])/n)
60
61
62def compare_matrices(matrix1, matrix2, treshold=0.05):
63    """
64    Compare 2 matrices nxm and return a matrix nxm with comparison data
65
66    @param matrix1: Reference Matrix with numeric data
67    @param matrix2: Matrix that will be compared
68    @param treshold: Any difference bigger than this percent treshold will be
69            reported.
70    """
71    improvements = 0
72    regressions = 0
73    same = 0
74    comparison_matrix = []
75
76    new_matrix = []
77    for line1, line2 in zip(matrix1, matrix2):
78        new_line = []
79        for element1, element2 in zip(line1, line2):
80            ratio = float(element2) / float(element1)
81            if ratio < (1 - treshold):
82                regressions += 1
83                new_line.append((100 * ratio - 1) - 100)
84            elif ratio > (1 + treshold):
85                improvements += 1
86                new_line.append("+" + str((100 * ratio - 1) - 100))
87            else:
88                same + 1
89                if line1.index(element1) == 0:
90                    new_line.append(element1)
91                else:
92                    new_line.append(".")
93        new_matrix.append(new_line)
94
95    total = improvements + regressions + same
96
97    return (new_matrix, improvements, regressions, total)
98
99
100class IOzoneAnalyzer(object):
101    """
102    Analyze an unprocessed IOzone file, and generate the following types of
103    report:
104
105    * Summary of throughput for all file and record sizes combined
106    * Summary of throughput for all file sizes
107    * Summary of throughput for all record sizes
108
109    If more than one file is provided to the analyzer object, a comparison
110    between the two runs is made, searching for regressions in performance.
111    """
112    def __init__(self, list_files, output_dir):
113        self.list_files = list_files
114        if not os.path.isdir(output_dir):
115            os.makedirs(output_dir)
116        self.output_dir = output_dir
117        logging.info("Results will be stored in %s", output_dir)
118
119
120    def average_performance(self, results, size=None):
121        """
122        Flattens a list containing performance results.
123
124        @param results: List of n lists containing data from performance runs.
125        @param size: Numerical value of a size (say, file_size) that was used
126                to filter the original results list.
127        @return: List with 1 list containing average data from the performance
128                run.
129        """
130        average_line = []
131        if size is not None:
132            average_line.append(size)
133        for i in range(2, 15):
134            average = geometric_mean([line[i] for line in results]) / 1024.0
135            average = int(average)
136            average_line.append(average)
137        return average_line
138
139
140    def process_results(self, results, label=None):
141        """
142        Process a list of IOzone results according to label.
143
144        @label: IOzone column label that we'll use to filter and compute
145                geometric mean results, in practical term either 'file_size'
146                or 'record_size'.
147        @result: A list of n x m columns with original iozone results.
148        @return: A list of n-? x (m-1) columns with geometric averages for
149                values of each label (ex, average for all file_sizes).
150        """
151        performance = []
152        if label is not None:
153            index = _LABELS.index(label)
154            sizes = unique([line[index] for line in results])
155            sizes.sort()
156            for size in sizes:
157                r_results = [line for line in results if line[index] == size]
158                performance.append(self.average_performance(r_results, size))
159        else:
160            performance.append(self.average_performance(results))
161
162        return performance
163
164
165    def parse_file(self, file):
166        """
167        Parse an IOzone results file.
168
169        @param file: File object that will be parsed.
170        @return: Matrix containing IOzone results extracted from the file.
171        """
172        lines = []
173        for line in file.readlines():
174            fields = line.split()
175            if len(fields) != 15:
176                continue
177            try:
178                lines.append([int(i) for i in fields])
179            except ValueError:
180                continue
181        return lines
182
183
184    def report(self, overall_results, record_size_results, file_size_results):
185        """
186        Generates analysis data for IOZone run.
187
188        Generates a report to both logs (where it goes with nice headers) and
189        output files for further processing (graph generation).
190
191        @param overall_results: 1x15 Matrix containing IOzone results for all
192                file sizes
193        @param record_size_results: nx15 Matrix containing IOzone results for
194                each record size tested.
195        @param file_size_results: nx15 Matrix containing file size results
196                for each file size tested.
197        """
198        # Here we'll use the logging system to put the output of our analysis
199        # to files
200        logger = logging.getLogger()
201        formatter = logging.Formatter("")
202
203        logging.info("")
204        logging.info("TABLE:  SUMMARY of ALL FILE and RECORD SIZES                        Results in MB/sec")
205        logging.info("")
206        logging.info("FILE & RECORD  INIT    RE              RE    RANDOM  RANDOM  BACKWD   RECRE  STRIDE    F       FRE     F       FRE")
207        logging.info("SIZES (KB)     WRITE   WRITE   READ    READ    READ   WRITE    READ   WRITE    READ    WRITE   WRITE   READ    READ")
208        logging.info("-------------------------------------------------------------------------------------------------------------------")
209        for result_line in overall_results:
210            logging.info("ALL            %-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s" % tuple(result_line))
211        logging.info("")
212
213        logging.info("DRILLED DATA:")
214
215        logging.info("")
216        logging.info("TABLE:  RECORD Size against all FILE Sizes                          Results in MB/sec")
217        logging.info("")
218        logging.info("RECORD    INIT    RE              RE    RANDOM  RANDOM  BACKWD   RECRE  STRIDE    F       FRE     F       FRE ")
219        logging.info("SIZE (KB) WRITE   WRITE   READ    READ    READ   WRITE    READ   WRITE    READ    WRITE   WRITE   READ    READ")
220        logging.info("--------------------------------------------------------------------------------------------------------------")
221
222        foutput_path = os.path.join(self.output_dir, '2d-datasource-file')
223        if os.path.isfile(foutput_path):
224            os.unlink(foutput_path)
225        foutput = logging.FileHandler(foutput_path)
226        foutput.setFormatter(formatter)
227        logger.addHandler(foutput)
228        for result_line in record_size_results:
229            logging.info("%-10s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s" % tuple(result_line))
230        logger.removeHandler(foutput)
231
232        logging.info("")
233
234        logging.info("")
235        logging.info("TABLE:  FILE Size against all RECORD Sizes                          Results in MB/sec")
236        logging.info("")
237        logging.info("RECORD    INIT    RE              RE    RANDOM  RANDOM  BACKWD   RECRE  STRIDE    F       FRE     F       FRE ")
238        logging.info("SIZE (KB) WRITE   WRITE   READ    READ    READ   WRITE    READ   WRITE    READ    WRITE   WRITE   READ    READ")
239        logging.info("--------------------------------------------------------------------------------------------------------------")
240
241        routput_path = os.path.join(self.output_dir, '2d-datasource-record')
242        if os.path.isfile(routput_path):
243            os.unlink(routput_path)
244        routput = logging.FileHandler(routput_path)
245        routput.setFormatter(formatter)
246        logger.addHandler(routput)
247        for result_line in file_size_results:
248            logging.info("%-10s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s" % tuple(result_line))
249        logger.removeHandler(routput)
250
251        logging.info("")
252
253
254    def report_comparison(self, record, file):
255        """
256        Generates comparison data for 2 IOZone runs.
257
258        It compares 2 sets of nxm results and outputs a table with differences.
259        If a difference higher or smaller than 5% is found, a warning is
260        triggered.
261
262        @param record: Tuple with 4 elements containing results for record size.
263        @param file: Tuple with 4 elements containing results for file size.
264        """
265        (record_size, record_improvements, record_regressions,
266         record_total) = record
267        (file_size, file_improvements, file_regressions,
268         file_total) = file
269        logging.info("ANALYSIS of DRILLED DATA:")
270
271        logging.info("")
272        logging.info("TABLE:  RECsize Difference between runs                            Results are % DIFF")
273        logging.info("")
274        logging.info("RECORD    INIT    RE              RE    RANDOM  RANDOM  BACKWD   RECRE  STRIDE    F       FRE     F       FRE ")
275        logging.info("SIZE (KB) WRITE   WRITE   READ    READ    READ   WRITE    READ   WRITE    READ    WRITE   WRITE   READ    READ")
276        logging.info("--------------------------------------------------------------------------------------------------------------")
277        for result_line in record_size:
278            logging.info("%-10s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s" % tuple(result_line))
279        logging.info("REGRESSIONS: %d (%.2f%%)    Improvements: %d (%.2f%%)",
280                     record_regressions,
281                     (100 * record_regressions/float(record_total)),
282                     record_improvements,
283                     (100 * record_improvements/float(record_total)))
284        logging.info("")
285
286        logging.info("")
287        logging.info("TABLE:  FILEsize Difference between runs                           Results are % DIFF")
288        logging.info("")
289        logging.info("RECORD    INIT    RE              RE    RANDOM  RANDOM  BACKWD   RECRE  STRIDE    F       FRE     F       FRE ")
290        logging.info("SIZE (KB) WRITE   WRITE   READ    READ    READ   WRITE    READ   WRITE    READ    WRITE   WRITE   READ    READ")
291        logging.info("--------------------------------------------------------------------------------------------------------------")
292        for result_line in file_size:
293            logging.info("%-10s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s" % tuple(result_line))
294        logging.info("REGRESSIONS: %d (%.2f%%)    Improvements: %d (%.2f%%)",
295                     file_regressions,
296                     (100 * file_regressions/float(file_total)),
297                     file_improvements,
298                     (100 * file_improvements/float(file_total)))
299        logging.info("")
300
301
302    def analyze(self):
303        """
304        Analyzes and eventually compares sets of IOzone data.
305        """
306        overall = []
307        record_size = []
308        file_size = []
309        for path in self.list_files:
310            file = open(path, 'r')
311            logging.info('FILE: %s', path)
312
313            results = self.parse_file(file)
314
315            overall_results = self.process_results(results)
316            record_size_results = self.process_results(results, 'record_size')
317            file_size_results = self.process_results(results, 'file_size')
318            self.report(overall_results, record_size_results, file_size_results)
319
320            if len(self.list_files) == 2:
321                overall.append(overall_results)
322                record_size.append(record_size_results)
323                file_size.append(file_size_results)
324
325        if len(self.list_files) == 2:
326            record_comparison = compare_matrices(*record_size)
327            file_comparison = compare_matrices(*file_size)
328            self.report_comparison(record_comparison, file_comparison)
329
330
331class IOzonePlotter(object):
332    """
333    Plots graphs based on the results of an IOzone run.
334
335    Plots graphs based on the results of an IOzone run. Uses gnuplot to
336    generate the graphs.
337    """
338    def __init__(self, results_file, output_dir):
339        self.active = True
340        try:
341            self.gnuplot = os_dep.command("gnuplot")
342        except:
343            logging.error("Command gnuplot not found, disabling graph "
344                          "generation")
345            self.active = False
346
347        if not os.path.isdir(output_dir):
348            os.makedirs(output_dir)
349        self.output_dir = output_dir
350
351        if not os.path.isfile(results_file):
352            logging.error("Invalid file %s provided, disabling graph "
353                          "generation", results_file)
354            self.active = False
355            self.results_file = None
356        else:
357            self.results_file = results_file
358            self.generate_data_source()
359
360
361    def generate_data_source(self):
362        """
363        Creates data file without headers for gnuplot consumption.
364        """
365        results_file = open(self.results_file, 'r')
366        self.datasource = os.path.join(self.output_dir, '3d-datasource')
367        datasource = open(self.datasource, 'w')
368        for line in results_file.readlines():
369            fields = line.split()
370            if len(fields) != 15:
371                continue
372            try:
373                values = [int(i) for i in fields]
374                datasource.write(line)
375            except ValueError:
376                continue
377        datasource.close()
378
379
380    def plot_2d_graphs(self):
381        """
382        For each one of the throughput parameters, generate a set of gnuplot
383        commands that will create a parametric surface with file size vs.
384        record size vs. throughput.
385        """
386        datasource_2d = os.path.join(self.output_dir, '2d-datasource-file')
387        for index, label in zip(range(2, 15), _LABELS[2:]):
388            commands_path = os.path.join(self.output_dir, '2d-%s.do' % label)
389            commands = ""
390            commands += "set title 'Iozone performance: %s'\n" % label
391            commands += "set logscale x\n"
392            commands += "set xlabel 'File size (KB)'\n"
393            commands += "set ylabel 'Througput (MB/s)'\n"
394            commands += "set terminal png small size 450 350\n"
395            commands += "set output '%s'\n" % os.path.join(self.output_dir,
396                                                           '2d-%s.png' % label)
397            commands += ("plot '%s' using 1:%s title '%s' with lines \n" %
398                         (datasource_2d, index, label))
399            commands_file = open(commands_path, 'w')
400            commands_file.write(commands)
401            commands_file.close()
402            try:
403                utils.system("%s %s" % (self.gnuplot, commands_path))
404            except error.CmdError:
405                logging.error("Problem plotting from commands file %s",
406                              commands_path)
407
408
409    def plot_3d_graphs(self):
410        """
411        For each one of the throughput parameters, generate a set of gnuplot
412        commands that will create a parametric surface with file size vs.
413        record size vs. throughput.
414        """
415        for index, label in zip(range(1, 14), _LABELS[2:]):
416            commands_path = os.path.join(self.output_dir, '%s.do' % label)
417            commands = ""
418            commands += "set title 'Iozone performance: %s'\n" % label
419            commands += "set grid lt 2 lw 1\n"
420            commands += "set surface\n"
421            commands += "set parametric\n"
422            commands += "set xtics\n"
423            commands += "set ytics\n"
424            commands += "set logscale x 2\n"
425            commands += "set logscale y 2\n"
426            commands += "set logscale z\n"
427            commands += "set xrange [2.**5:2.**24]\n"
428            commands += "set xlabel 'File size (KB)'\n"
429            commands += "set ylabel 'Record size (KB)'\n"
430            commands += "set zlabel 'Througput (KB/s)'\n"
431            commands += "set data style lines\n"
432            commands += "set dgrid3d 80,80, 3\n"
433            commands += "set terminal png small size 900 700\n"
434            commands += "set output '%s'\n" % os.path.join(self.output_dir,
435                                                           '%s.png' % label)
436            commands += ("splot '%s' using 1:2:%s title '%s'\n" %
437                         (self.datasource, index, label))
438            commands_file = open(commands_path, 'w')
439            commands_file.write(commands)
440            commands_file.close()
441            try:
442                utils.system("%s %s" % (self.gnuplot, commands_path))
443            except error.CmdError:
444                logging.error("Problem plotting from commands file %s",
445                              commands_path)
446
447
448    def plot_all(self):
449        """
450        Plot all graphs that are to be plotted, provided that we have gnuplot.
451        """
452        if self.active:
453            self.plot_2d_graphs()
454            self.plot_3d_graphs()
455
456
457class AnalyzerLoggingConfig(logging_config.LoggingConfig):
458    def configure_logging(self, results_dir=None, verbose=False):
459        super(AnalyzerLoggingConfig, self).configure_logging(use_console=True,
460                                                        verbose=verbose)
461
462
463if __name__ == "__main__":
464    parser = optparse.OptionParser("usage: %prog [options] [filenames]")
465    options, args = parser.parse_args()
466
467    logging_manager.configure_logging(AnalyzerLoggingConfig())
468
469    if args:
470        filenames = args
471    else:
472        parser.print_help()
473        sys.exit(1)
474
475    if len(args) > 2:
476        parser.print_help()
477        sys.exit(1)
478
479    o = os.path.join(os.getcwd(),
480                     "iozone-graphs-%s" % time.strftime('%Y-%m-%d-%H.%M.%S'))
481    if not os.path.isdir(o):
482        os.makedirs(o)
483
484    a = IOzoneAnalyzer(list_files=filenames, output_dir=o)
485    a.analyze()
486    p = IOzonePlotter(results_file=filenames[0], output_dir=o)
487    p.plot_all()
488