1#!/usr/bin/python2 2""" 3Postprocessing module for IOzone. It is capable to pick results from an 4IOzone run, calculate the geometric mean for all throughput results for 5a given file size or record size, and then generate a series of 2D and 3D 6graphs. The graph generation functionality depends on gnuplot, and if it 7is not present, functionality degrates gracefully. 8 9@copyright: Red Hat 2010 10""" 11import os, sys, optparse, logging, math, time 12import common 13from autotest_lib.client.common_lib import logging_config, logging_manager 14from autotest_lib.client.common_lib import error 15from autotest_lib.client.bin import utils, os_dep 16 17 18_LABELS = ['file_size', 'record_size', 'write', 'rewrite', 'read', 'reread', 19 'randread', 'randwrite', 'bkwdread', 'recordrewrite', 'strideread', 20 'fwrite', 'frewrite', 'fread', 'freread'] 21 22 23def unique(list): 24 """ 25 Return a list of the elements in list, but without duplicates. 26 27 @param list: List with values. 28 @return: List with non duplicate elements. 29 """ 30 n = len(list) 31 if n == 0: 32 return [] 33 u = {} 34 try: 35 for x in list: 36 u[x] = 1 37 except TypeError: 38 return None 39 else: 40 return u.keys() 41 42 43def geometric_mean(values): 44 """ 45 Evaluates the geometric mean for a list of numeric values. 46 47 @param values: List with values. 48 @return: Single value representing the geometric mean for the list values. 49 @see: http://en.wikipedia.org/wiki/Geometric_mean 50 """ 51 try: 52 values = [int(value) for value in values] 53 except ValueError: 54 return None 55 product = 1 56 n = len(values) 57 if n == 0: 58 return None 59 return math.exp(sum([math.log(x) for x in values])/n) 60 61 62def compare_matrices(matrix1, matrix2, treshold=0.05): 63 """ 64 Compare 2 matrices nxm and return a matrix nxm with comparison data 65 66 @param matrix1: Reference Matrix with numeric data 67 @param matrix2: Matrix that will be compared 68 @param treshold: Any difference bigger than this percent treshold will be 69 reported. 70 """ 71 improvements = 0 72 regressions = 0 73 same = 0 74 comparison_matrix = [] 75 76 new_matrix = [] 77 for line1, line2 in zip(matrix1, matrix2): 78 new_line = [] 79 for element1, element2 in zip(line1, line2): 80 ratio = float(element2) / float(element1) 81 if ratio < (1 - treshold): 82 regressions += 1 83 new_line.append((100 * ratio - 1) - 100) 84 elif ratio > (1 + treshold): 85 improvements += 1 86 new_line.append("+" + str((100 * ratio - 1) - 100)) 87 else: 88 same + 1 89 if line1.index(element1) == 0: 90 new_line.append(element1) 91 else: 92 new_line.append(".") 93 new_matrix.append(new_line) 94 95 total = improvements + regressions + same 96 97 return (new_matrix, improvements, regressions, total) 98 99 100class IOzoneAnalyzer(object): 101 """ 102 Analyze an unprocessed IOzone file, and generate the following types of 103 report: 104 105 * Summary of throughput for all file and record sizes combined 106 * Summary of throughput for all file sizes 107 * Summary of throughput for all record sizes 108 109 If more than one file is provided to the analyzer object, a comparison 110 between the two runs is made, searching for regressions in performance. 111 """ 112 def __init__(self, list_files, output_dir): 113 self.list_files = list_files 114 if not os.path.isdir(output_dir): 115 os.makedirs(output_dir) 116 self.output_dir = output_dir 117 logging.info("Results will be stored in %s", output_dir) 118 119 120 def average_performance(self, results, size=None): 121 """ 122 Flattens a list containing performance results. 123 124 @param results: List of n lists containing data from performance runs. 125 @param size: Numerical value of a size (say, file_size) that was used 126 to filter the original results list. 127 @return: List with 1 list containing average data from the performance 128 run. 129 """ 130 average_line = [] 131 if size is not None: 132 average_line.append(size) 133 for i in range(2, 15): 134 average = geometric_mean([line[i] for line in results]) / 1024.0 135 average = int(average) 136 average_line.append(average) 137 return average_line 138 139 140 def process_results(self, results, label=None): 141 """ 142 Process a list of IOzone results according to label. 143 144 @label: IOzone column label that we'll use to filter and compute 145 geometric mean results, in practical term either 'file_size' 146 or 'record_size'. 147 @result: A list of n x m columns with original iozone results. 148 @return: A list of n-? x (m-1) columns with geometric averages for 149 values of each label (ex, average for all file_sizes). 150 """ 151 performance = [] 152 if label is not None: 153 index = _LABELS.index(label) 154 sizes = unique([line[index] for line in results]) 155 sizes.sort() 156 for size in sizes: 157 r_results = [line for line in results if line[index] == size] 158 performance.append(self.average_performance(r_results, size)) 159 else: 160 performance.append(self.average_performance(results)) 161 162 return performance 163 164 165 def parse_file(self, file): 166 """ 167 Parse an IOzone results file. 168 169 @param file: File object that will be parsed. 170 @return: Matrix containing IOzone results extracted from the file. 171 """ 172 lines = [] 173 for line in file.readlines(): 174 fields = line.split() 175 if len(fields) != 15: 176 continue 177 try: 178 lines.append([int(i) for i in fields]) 179 except ValueError: 180 continue 181 return lines 182 183 184 def report(self, overall_results, record_size_results, file_size_results): 185 """ 186 Generates analysis data for IOZone run. 187 188 Generates a report to both logs (where it goes with nice headers) and 189 output files for further processing (graph generation). 190 191 @param overall_results: 1x15 Matrix containing IOzone results for all 192 file sizes 193 @param record_size_results: nx15 Matrix containing IOzone results for 194 each record size tested. 195 @param file_size_results: nx15 Matrix containing file size results 196 for each file size tested. 197 """ 198 # Here we'll use the logging system to put the output of our analysis 199 # to files 200 logger = logging.getLogger() 201 formatter = logging.Formatter("") 202 203 logging.info("") 204 logging.info("TABLE: SUMMARY of ALL FILE and RECORD SIZES Results in MB/sec") 205 logging.info("") 206 logging.info("FILE & RECORD INIT RE RE RANDOM RANDOM BACKWD RECRE STRIDE F FRE F FRE") 207 logging.info("SIZES (KB) WRITE WRITE READ READ READ WRITE READ WRITE READ WRITE WRITE READ READ") 208 logging.info("-------------------------------------------------------------------------------------------------------------------") 209 for result_line in overall_results: 210 logging.info("ALL %-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s" % tuple(result_line)) 211 logging.info("") 212 213 logging.info("DRILLED DATA:") 214 215 logging.info("") 216 logging.info("TABLE: RECORD Size against all FILE Sizes Results in MB/sec") 217 logging.info("") 218 logging.info("RECORD INIT RE RE RANDOM RANDOM BACKWD RECRE STRIDE F FRE F FRE ") 219 logging.info("SIZE (KB) WRITE WRITE READ READ READ WRITE READ WRITE READ WRITE WRITE READ READ") 220 logging.info("--------------------------------------------------------------------------------------------------------------") 221 222 foutput_path = os.path.join(self.output_dir, '2d-datasource-file') 223 if os.path.isfile(foutput_path): 224 os.unlink(foutput_path) 225 foutput = logging.FileHandler(foutput_path) 226 foutput.setFormatter(formatter) 227 logger.addHandler(foutput) 228 for result_line in record_size_results: 229 logging.info("%-10s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s" % tuple(result_line)) 230 logger.removeHandler(foutput) 231 232 logging.info("") 233 234 logging.info("") 235 logging.info("TABLE: FILE Size against all RECORD Sizes Results in MB/sec") 236 logging.info("") 237 logging.info("RECORD INIT RE RE RANDOM RANDOM BACKWD RECRE STRIDE F FRE F FRE ") 238 logging.info("SIZE (KB) WRITE WRITE READ READ READ WRITE READ WRITE READ WRITE WRITE READ READ") 239 logging.info("--------------------------------------------------------------------------------------------------------------") 240 241 routput_path = os.path.join(self.output_dir, '2d-datasource-record') 242 if os.path.isfile(routput_path): 243 os.unlink(routput_path) 244 routput = logging.FileHandler(routput_path) 245 routput.setFormatter(formatter) 246 logger.addHandler(routput) 247 for result_line in file_size_results: 248 logging.info("%-10s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s" % tuple(result_line)) 249 logger.removeHandler(routput) 250 251 logging.info("") 252 253 254 def report_comparison(self, record, file): 255 """ 256 Generates comparison data for 2 IOZone runs. 257 258 It compares 2 sets of nxm results and outputs a table with differences. 259 If a difference higher or smaller than 5% is found, a warning is 260 triggered. 261 262 @param record: Tuple with 4 elements containing results for record size. 263 @param file: Tuple with 4 elements containing results for file size. 264 """ 265 (record_size, record_improvements, record_regressions, 266 record_total) = record 267 (file_size, file_improvements, file_regressions, 268 file_total) = file 269 logging.info("ANALYSIS of DRILLED DATA:") 270 271 logging.info("") 272 logging.info("TABLE: RECsize Difference between runs Results are % DIFF") 273 logging.info("") 274 logging.info("RECORD INIT RE RE RANDOM RANDOM BACKWD RECRE STRIDE F FRE F FRE ") 275 logging.info("SIZE (KB) WRITE WRITE READ READ READ WRITE READ WRITE READ WRITE WRITE READ READ") 276 logging.info("--------------------------------------------------------------------------------------------------------------") 277 for result_line in record_size: 278 logging.info("%-10s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s" % tuple(result_line)) 279 logging.info("REGRESSIONS: %d (%.2f%%) Improvements: %d (%.2f%%)", 280 record_regressions, 281 (100 * record_regressions/float(record_total)), 282 record_improvements, 283 (100 * record_improvements/float(record_total))) 284 logging.info("") 285 286 logging.info("") 287 logging.info("TABLE: FILEsize Difference between runs Results are % DIFF") 288 logging.info("") 289 logging.info("RECORD INIT RE RE RANDOM RANDOM BACKWD RECRE STRIDE F FRE F FRE ") 290 logging.info("SIZE (KB) WRITE WRITE READ READ READ WRITE READ WRITE READ WRITE WRITE READ READ") 291 logging.info("--------------------------------------------------------------------------------------------------------------") 292 for result_line in file_size: 293 logging.info("%-10s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s" % tuple(result_line)) 294 logging.info("REGRESSIONS: %d (%.2f%%) Improvements: %d (%.2f%%)", 295 file_regressions, 296 (100 * file_regressions/float(file_total)), 297 file_improvements, 298 (100 * file_improvements/float(file_total))) 299 logging.info("") 300 301 302 def analyze(self): 303 """ 304 Analyzes and eventually compares sets of IOzone data. 305 """ 306 overall = [] 307 record_size = [] 308 file_size = [] 309 for path in self.list_files: 310 file = open(path, 'r') 311 logging.info('FILE: %s', path) 312 313 results = self.parse_file(file) 314 315 overall_results = self.process_results(results) 316 record_size_results = self.process_results(results, 'record_size') 317 file_size_results = self.process_results(results, 'file_size') 318 self.report(overall_results, record_size_results, file_size_results) 319 320 if len(self.list_files) == 2: 321 overall.append(overall_results) 322 record_size.append(record_size_results) 323 file_size.append(file_size_results) 324 325 if len(self.list_files) == 2: 326 record_comparison = compare_matrices(*record_size) 327 file_comparison = compare_matrices(*file_size) 328 self.report_comparison(record_comparison, file_comparison) 329 330 331class IOzonePlotter(object): 332 """ 333 Plots graphs based on the results of an IOzone run. 334 335 Plots graphs based on the results of an IOzone run. Uses gnuplot to 336 generate the graphs. 337 """ 338 def __init__(self, results_file, output_dir): 339 self.active = True 340 try: 341 self.gnuplot = os_dep.command("gnuplot") 342 except: 343 logging.error("Command gnuplot not found, disabling graph " 344 "generation") 345 self.active = False 346 347 if not os.path.isdir(output_dir): 348 os.makedirs(output_dir) 349 self.output_dir = output_dir 350 351 if not os.path.isfile(results_file): 352 logging.error("Invalid file %s provided, disabling graph " 353 "generation", results_file) 354 self.active = False 355 self.results_file = None 356 else: 357 self.results_file = results_file 358 self.generate_data_source() 359 360 361 def generate_data_source(self): 362 """ 363 Creates data file without headers for gnuplot consumption. 364 """ 365 results_file = open(self.results_file, 'r') 366 self.datasource = os.path.join(self.output_dir, '3d-datasource') 367 datasource = open(self.datasource, 'w') 368 for line in results_file.readlines(): 369 fields = line.split() 370 if len(fields) != 15: 371 continue 372 try: 373 values = [int(i) for i in fields] 374 datasource.write(line) 375 except ValueError: 376 continue 377 datasource.close() 378 379 380 def plot_2d_graphs(self): 381 """ 382 For each one of the throughput parameters, generate a set of gnuplot 383 commands that will create a parametric surface with file size vs. 384 record size vs. throughput. 385 """ 386 datasource_2d = os.path.join(self.output_dir, '2d-datasource-file') 387 for index, label in zip(range(2, 15), _LABELS[2:]): 388 commands_path = os.path.join(self.output_dir, '2d-%s.do' % label) 389 commands = "" 390 commands += "set title 'Iozone performance: %s'\n" % label 391 commands += "set logscale x\n" 392 commands += "set xlabel 'File size (KB)'\n" 393 commands += "set ylabel 'Througput (MB/s)'\n" 394 commands += "set terminal png small size 450 350\n" 395 commands += "set output '%s'\n" % os.path.join(self.output_dir, 396 '2d-%s.png' % label) 397 commands += ("plot '%s' using 1:%s title '%s' with lines \n" % 398 (datasource_2d, index, label)) 399 commands_file = open(commands_path, 'w') 400 commands_file.write(commands) 401 commands_file.close() 402 try: 403 utils.system("%s %s" % (self.gnuplot, commands_path)) 404 except error.CmdError: 405 logging.error("Problem plotting from commands file %s", 406 commands_path) 407 408 409 def plot_3d_graphs(self): 410 """ 411 For each one of the throughput parameters, generate a set of gnuplot 412 commands that will create a parametric surface with file size vs. 413 record size vs. throughput. 414 """ 415 for index, label in zip(range(1, 14), _LABELS[2:]): 416 commands_path = os.path.join(self.output_dir, '%s.do' % label) 417 commands = "" 418 commands += "set title 'Iozone performance: %s'\n" % label 419 commands += "set grid lt 2 lw 1\n" 420 commands += "set surface\n" 421 commands += "set parametric\n" 422 commands += "set xtics\n" 423 commands += "set ytics\n" 424 commands += "set logscale x 2\n" 425 commands += "set logscale y 2\n" 426 commands += "set logscale z\n" 427 commands += "set xrange [2.**5:2.**24]\n" 428 commands += "set xlabel 'File size (KB)'\n" 429 commands += "set ylabel 'Record size (KB)'\n" 430 commands += "set zlabel 'Througput (KB/s)'\n" 431 commands += "set data style lines\n" 432 commands += "set dgrid3d 80,80, 3\n" 433 commands += "set terminal png small size 900 700\n" 434 commands += "set output '%s'\n" % os.path.join(self.output_dir, 435 '%s.png' % label) 436 commands += ("splot '%s' using 1:2:%s title '%s'\n" % 437 (self.datasource, index, label)) 438 commands_file = open(commands_path, 'w') 439 commands_file.write(commands) 440 commands_file.close() 441 try: 442 utils.system("%s %s" % (self.gnuplot, commands_path)) 443 except error.CmdError: 444 logging.error("Problem plotting from commands file %s", 445 commands_path) 446 447 448 def plot_all(self): 449 """ 450 Plot all graphs that are to be plotted, provided that we have gnuplot. 451 """ 452 if self.active: 453 self.plot_2d_graphs() 454 self.plot_3d_graphs() 455 456 457class AnalyzerLoggingConfig(logging_config.LoggingConfig): 458 def configure_logging(self, results_dir=None, verbose=False): 459 super(AnalyzerLoggingConfig, self).configure_logging(use_console=True, 460 verbose=verbose) 461 462 463if __name__ == "__main__": 464 parser = optparse.OptionParser("usage: %prog [options] [filenames]") 465 options, args = parser.parse_args() 466 467 logging_manager.configure_logging(AnalyzerLoggingConfig()) 468 469 if args: 470 filenames = args 471 else: 472 parser.print_help() 473 sys.exit(1) 474 475 if len(args) > 2: 476 parser.print_help() 477 sys.exit(1) 478 479 o = os.path.join(os.getcwd(), 480 "iozone-graphs-%s" % time.strftime('%Y-%m-%d-%H.%M.%S')) 481 if not os.path.isdir(o): 482 os.makedirs(o) 483 484 a = IOzoneAnalyzer(list_files=filenames, output_dir=o) 485 a.analyze() 486 p = IOzonePlotter(results_file=filenames[0], output_dir=o) 487 p.plot_all() 488