1 #!/usr/bin/python3 2"""Read intermediate tensors generated by DumpAllTensors activity 3 4Tools for reading/ parsing intermediate tensors. 5""" 6 7import argparse 8import datetime 9import numpy as np 10import os 11import pandas as pd 12import tensorflow as tf 13import json 14import seaborn as sns 15import matplotlib 16matplotlib.use('Agg') 17import matplotlib.pyplot as plt 18import matplotlib.animation as animation 19import multiprocessing 20 21from matplotlib.pylab import * 22from tqdm import tqdm 23# Enable large animation size 24matplotlib.rcParams['animation.embed_limit'] = 2**128 25# Enable tensor.numpy() 26tf.compat.v1.enable_eager_execution() 27 28 29############################ Helper Functions ############################ 30def reshape_to_matrix(array): 31 """Reshape an array to a square matrix padded with np.nan at the end.""" 32 array = array.astype(float) 33 width = math.ceil(len(array)**0.5) 34 height = math.ceil(len(array)/ width) 35 padded = np.pad(array=array, 36 pad_width=(0, width * height - len(array)), 37 mode='constant', 38 constant_values=np.nan) 39 padded = padded.reshape(width, -1) 40 return padded 41 42def save_ani_to_video(ani, save_video_path, video_fps=5): 43 Writer = animation.writers['ffmpeg'] 44 writer = Writer(fps=video_fps) 45 #Save the movie 46 ani.save(save_video_path, writer=writer, dpi=250) 47 48def save_ani_to_html(ani, save_html_path): 49 with open(save_html_path, 'w') as f: 50 f.write(ani.to_jshtml()) 51 52############################ ModelMetaDataManager ############################ 53class ModelMetaDataManager(object): 54 """Maps model name in nnapi to its graph architecture with lazy initialization. 55 56 # Arguments 57 android_build_top: the root directory of android source tree dump_dir: 58 directory containing intermediate tensors pulled from device 59 tflite_model_json_path: directory containing intermediate json output of 60 model visualization tool (third_party/tensorflow/lite/tools:visualize) The 61 json output path from the tool is always /tmp. 62 """ 63 ############################ ModelMetaData ############################ 64 class ModelMetaData(object): 65 """Store graph information of a model.""" 66 67 def __init__(self, tflite_model_json_path='/tmp'): 68 with open(tflite_model_json_path, 'rb') as f: 69 model_json = json.load(f) 70 self.operators = model_json['subgraphs'][0]['operators'] 71 self.operator_codes = [item['builtin_code']\ 72 for item in model_json['operator_codes']] 73 self.output_meta_data = [] 74 self.load_output_meta_data() 75 76 def load_output_meta_data(self): 77 for operator in self.operators: 78 data = {} 79 # Each operator can only have one output 80 assert(len(operator['outputs']) == 1) 81 data['output_tensor_index'] = operator['outputs'][0] 82 data['fused_activation_function'] = operator\ 83 .get('builtin_options', {})\ 84 .get('fused_activation_function', '') 85 data['operator_code'] = self.operator_codes[operator['opcode_index']] 86 self.output_meta_data.append(data) 87 88 def __init__(self, android_build_top, dump_dir, tflite_model_json_dir='/tmp'): 89 # key: nnapi model name, value: ModelMetaData 90 self.models = dict() 91 self.ANDROID_BUILD_TOP = android_build_top + "/" 92 self.TFLITE_MODEL_JSON_DIR = tflite_model_json_dir + "/" 93 self.DUMP_DIR = dump_dir + "/" 94 self.nnapi_to_tflite_name = dict() 95 self.tflite_to_nnapi_name = dict() 96 self.__load_mobilenet_topk_aosp() 97 self.model_names = sorted(os.listdir(dump_dir)) 98 99 def __load_mobilenet_topk_aosp(self): 100 """Load information about tflite and nnapi model names.""" 101 json_path = '{}/{}'.format( 102 self.ANDROID_BUILD_TOP, 103 'test/mlts/models/assets/models_list/mobilenet_topk_aosp.json') 104 with open(json_path, 'rb') as f: 105 topk_aosp = json.load(f) 106 for model in topk_aosp['models']: 107 self.nnapi_to_tflite_name[model['name']] = model['modelFile'] 108 self.tflite_to_nnapi_name[model['modelFile']] = model['name'] 109 110 def __get_model_json_path(self, tflite_model_name): 111 """Return tflite model jason path.""" 112 json_path = '{}/{}.json'.format(self.TFLITE_MODEL_JSON_DIR, 113 tflite_model_name) 114 return json_path 115 116 def __load_model(self, tflite_model_name): 117 """Initialize a ModelMetaData for this model.""" 118 model = self.ModelMetaData(self.__get_model_json_path(tflite_model_name)) 119 nnapi_model_name = self.model_name_tflite_to_nnapi(tflite_model_name) 120 self.models[nnapi_model_name] = model 121 122 def model_name_nnapi_to_tflite(self, nnapi_model_name): 123 return self.nnapi_to_tflite_name.get(nnapi_model_name, nnapi_model_name) 124 125 def model_name_tflite_to_nnapi(self, tflite_model_name): 126 return self.tflite_to_nnapi_name.get(tflite_model_name, tflite_model_name) 127 128 def get_model_meta_data(self, nnapi_model_name): 129 """Retrieve the ModelMetaData with lazy initialization.""" 130 tflite_model_name = self.model_name_nnapi_to_tflite(nnapi_model_name) 131 if nnapi_model_name not in self.models: 132 self.__load_model(tflite_model_name) 133 return self.models[nnapi_model_name] 134 135 def generate_animation_html(self, output_file_path, model_names=None, heatmap=True): 136 """Generate a html file containing the hist and heatmap animation of all models""" 137 model_names = self.model_names if model_names is None else model_names 138 html_data = '' 139 for model_name in tqdm(model_names): 140 print(datetime.datetime.now(), 'Processing', model_name) 141 html_data += '<h3>{}</h3>'.format(model_name) 142 model_data = ModelData(nnapi_model_name=model_name, manager=self) 143 ani = model_data.gen_error_hist_animation() 144 html_data += ani.to_jshtml() 145 if heatmap: 146 ani = model_data.gen_heatmap_animation() 147 html_data += ani.to_jshtml() 148 with open(output_file_path, 'w') as f: 149 f.write(html_data) 150 151 def generate_hist_animation_html(self, model_name): 152 """Generate a html hist animation for a model, used for multiprocessing""" 153 html_data = '<h3>{}</h3>'.format(model_name) 154 model_data = ModelData(nnapi_model_name=model_name, manager=self) 155 ani = model_data.gen_error_hist_animation() 156 html_data += ani.to_jshtml() 157 print(datetime.datetime.now(), "Done histogram for", model_name) 158 self.return_dict[model_name + "-hist"] = html_data 159 160 def generate_heatmap_animation_html(self, model_name): 161 """Generate a html hist animation for a model, used for multiprocessing""" 162 model_data = ModelData(nnapi_model_name=model_name, manager=self) 163 ani = model_data.gen_heatmap_animation() 164 html_data = ani.to_jshtml() 165 print(datetime.datetime.now(), "Done heatmap for", model_name) 166 self.return_dict[model_name + "-heatmap"] = html_data 167 168 def multiprocessing_generate_animation_html(self, output_file_path, 169 model_names=None, heatmap=True): 170 """ 171 Generate a html file containing the hist and heatmap animation of all models 172 with multiple process. 173 """ 174 model_names = self.model_names if model_names is None else model_names 175 manager = multiprocessing.Manager() 176 self.return_dict = manager.dict() 177 jobs = [] 178 for model_name in model_names: 179 for target_func in [self.generate_hist_animation_html, self.generate_heatmap_animation_html]: 180 p = multiprocessing.Process(target=target_func, args=(model_name,)) 181 jobs.append(p) 182 p.start() 183 # wait for completion 184 for job in jobs: 185 job.join() 186 187 with open(output_file_path, 'w') as f: 188 for model_name in model_names: 189 f.write(self.return_dict[model_name + "-hist"]) 190 f.write(self.return_dict[model_name + "-heatmap"]) 191 192 193############################ TensorDict ############################ 194class TensorDict(dict): 195 """A class to store cpu and nnapi tensors. 196 197 # Arguments 198 model_dir: directory containing intermediate tensors pulled from device 199 """ 200 def __init__(self, model_dir): 201 super().__init__() 202 for useNNAPIDir in ['cpu', 'nnapi']: 203 dir_path = model_dir + useNNAPIDir + "/" 204 self[useNNAPIDir] = self.read_tensors_from_dir(dir_path) 205 self.tensor_sanity_check() 206 self.max_absolute_diff, self.min_absolute_diff = 0.0, 0.0 207 self.max_relative_diff, self.min_relative_diff = 0.0, 0.0 208 self.layers = sorted(self['cpu'].keys()) 209 self.calc_range() 210 211 def bytes_to_numpy_tensor(self, file_path): 212 """Load bytes outputed from DumpIntermediateTensor into numpy tensor.""" 213 if 'quant' in file_path or '8bit' in file_path: 214 tensor_type = tf.int8 215 else: 216 tensor_type = tf.float32 217 with open(file_path, mode='rb') as f: 218 tensor_bytes = f.read() 219 tensor = tf.decode_raw(input_bytes=tensor_bytes, out_type=tensor_type) 220 if np.isnan(np.sum(tensor)): 221 print('WARNING: tensor contains inf or nan') 222 return tensor.numpy() 223 224 def read_tensors_from_dir(self, dir_path): 225 tensor_dict = dict() 226 for tensor_file in os.listdir(dir_path): 227 tensor = self.bytes_to_numpy_tensor(dir_path + tensor_file) 228 tensor_dict[tensor_file] = tensor 229 return tensor_dict 230 231 def tensor_sanity_check(self): 232 # Make sure the cpu tensors and nnapi tensors have the same outputs 233 assert(set(self['cpu'].keys()) == set(self['nnapi'].keys())) 234 print('Tensor sanity check passed') 235 236 def calc_range(self): 237 for layer in self.layers: 238 diff = self.calc_diff(layer, relative_error=False) 239 # update absolute max, min 240 self.max_absolute_diff = max(self.max_absolute_diff, np.max(diff)) 241 self.min_absolute_diff = min(self.min_absolute_diff, np.min(diff)) 242 self.absolute_range = max(abs(self.min_absolute_diff), 243 abs(self.max_absolute_diff)) 244 245 def calc_diff(self, layer, relative_error=True): 246 cpu_tensor = self['cpu'][layer] 247 nnapi_tensor = self['nnapi'][layer] 248 assert(cpu_tensor.shape == nnapi_tensor.shape) 249 diff = cpu_tensor - nnapi_tensor 250 if not relative_error: 251 return diff 252 diff = diff.astype(float) 253 cpu_tensor = cpu_tensor.astype(float) 254 # Devide by max so the relative error range is conveniently [-1, 1] 255 max_cpu_nnapi_tensor = np.maximum(np.abs(cpu_tensor), np.abs(nnapi_tensor)) 256 relative_diff = np.divide(diff, max_cpu_nnapi_tensor, out=np.zeros_like(diff), 257 where=max_cpu_nnapi_tensor>0) 258 return relative_diff 259 260 def gen_tensor_diff_stats(self, relative_error=True, return_df=True, plot_diff=False): 261 stats = [] 262 for layer in self.layers: 263 diff = self.calc_diff(layer, relative_error) 264 if plot_diff: 265 self.plot_tensor_diff(diff) 266 if return_df: 267 stats.append({ 268 'layer': layer, 269 'min': np.min(diff), 270 'max': np.max(diff), 271 'mean': np.mean(diff), 272 'median': np.median(diff) 273 }) 274 if return_df: 275 return pd.DataFrame(stats) 276 277 def plot_tensor_diff(diff): 278 plt.figure() 279 plt.hist(diff, bins=50, log=True) 280 plt.plot() 281 282 283############################ Model Data ############################ 284class ModelData(object): 285 """A class to store all relevant inormation of a model. 286 287 # Arguments 288 nnapi_model_name: the name of the model 289 manager: ModelMetaDataManager 290 """ 291 def __init__(self, nnapi_model_name, manager, seq_limit=10): 292 self.nnapi_model_name = nnapi_model_name 293 self.manager = manager 294 self.model_dir = self.get_target_model_dir(manager.DUMP_DIR, 295 nnapi_model_name) 296 self.tensor_dict = TensorDict(self.model_dir) 297 self.mmd = manager.get_model_meta_data(nnapi_model_name) 298 self.stats = self.tensor_dict.gen_tensor_diff_stats(relative_error=True, 299 return_df=True) 300 self.layers = sorted(self.tensor_dict['cpu'].keys()) 301 self.cmap = sns.diverging_palette(220, 20, sep=20, as_cmap=True) 302 self.seq_limit = seq_limit 303 304 def get_target_model_dir(self, dump_dir, target_model_name): 305 # Get the model directory path 306 target_model_dir = dump_dir + target_model_name + "/" 307 return target_model_dir 308 309 def __sns_distplot(self, layer, bins, ax, range, relative_error): 310 sns.distplot(self.tensor_dict.calc_diff(layer, relative_error=relative_error), bins=bins, 311 hist_kws={"range":range, "log":True}, ax=ax, kde=False) 312 313 def __plt_hist(self, layer, bins, ax, range, relative_error): 314 ax.hist(self.tensor_dict.calc_diff(layer, relative_error=relative_error), bins=bins, 315 range=range, log=True) 316 317 def __get_layer_num(self): 318 if self.seq_limit: 319 return min(len(self.layers), len(self.mmd.output_meta_data) * self.seq_limit) 320 return len(self.layers) 321 322 def update_hist_data(self, i, fig, ax1, ax2, bins=50, plot_library='sns'): 323 # Use % because there may be multiple testing samples 324 operation = self.mmd.output_meta_data[i % len(self.mmd.output_meta_data)]['operator_code'] 325 layer = self.layers[i] 326 subtitle = fig.suptitle('{} | {}\n{}' 327 .format(self.nnapi_model_name, layer, operation), 328 fontsize='x-large') 329 for ax in (ax1, ax2): 330 ax.clear() 331 ax1.set_title('Relative Error') 332 ax2.set_title('Absolute Error') 333 absolute_range = self.tensor_dict.absolute_range 334 335 # Determine underlying plotting library 336 hist_func = self.__plt_hist if plot_library == 'matplotlib' else self.__sns_distplot 337 hist_func(layer=layer, bins=bins, ax=ax1, 338 range=(-1, 1), relative_error=True) 339 hist_func(layer=layer, bins=bins, ax=ax2, 340 range=(-absolute_range, absolute_range), relative_error=False) 341 342 def gen_error_hist_animation(self, save_video_path=None, video_fps=10): 343 fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12,9)) 344 ani = animation.FuncAnimation(fig, self.update_hist_data, self.__get_layer_num(), 345 fargs=(fig, ax1, ax2), 346 interval=200, repeat=False) 347 # close before return to avoid dangling plot 348 if save_video_path: 349 save_ani_to_video(ani, save_video_path, video_fps) 350 plt.close() 351 return ani 352 353 def __sns_heatmap(self, data, ax, cbar_ax, **kwargs): 354 return sns.heatmap(data, cmap=self.cmap, cbar=True, ax=ax, cbar_ax=cbar_ax, 355 cbar_kws={"orientation": "horizontal"}, center=0, **kwargs) 356 357 def update_heatmap_data(self, i, fig, axs): 358 # Use % because there may be multiple testing samples 359 operation = self.mmd.output_meta_data[i % len(self.mmd.output_meta_data)]['operator_code'] 360 layer = self.layers[i] 361 subtitle = fig.suptitle('{} | {}\n{}\n' 362 .format(self.nnapi_model_name, layer, operation), 363 fontsize='x-large') 364 # Clear all the axs and redraw 365 # It's important to clear the colorbars as well to avoid duplicate colorbars 366 for ax_tuple in axs: 367 for ax in ax_tuple: 368 ax.clear() 369 axs[0][0].set_title('Diff') 370 axs[0][1].set_title('CPU Tensor') 371 axs[0][2].set_title('NNAPI Tensor') 372 373 reshaped_diff = reshape_to_matrix(self.tensor_dict.calc_diff(layer, relative_error=False)) 374 reshaped_cpu = reshape_to_matrix(self.tensor_dict['cpu'][layer]) 375 reshaped_nnapi = reshape_to_matrix(self.tensor_dict['nnapi'][layer]) 376 absolute_range = self.tensor_dict.absolute_range 377 g1 = self.__sns_heatmap(data=reshaped_diff, ax=axs[0][0], cbar_ax=axs[1][0], 378 vmin=-absolute_range, vmax=absolute_range) 379 g2 = self.__sns_heatmap(data=reshaped_cpu, ax=axs[0][1], cbar_ax=axs[1][1]) 380 g3 = self.__sns_heatmap(data=reshaped_nnapi, ax=axs[0][2], cbar_ax=axs[1][2]) 381 382 def gen_heatmap_animation(self, save_video_path=None, video_fps=10, figsize=(13,6)): 383 fig = plt.figure(constrained_layout=True, figsize=figsize) 384 widths = [1, 1, 1] 385 heights = [7, 1] 386 spec = fig.add_gridspec(ncols=3, nrows=2, width_ratios=widths, 387 height_ratios=heights) 388 axs = [] 389 for row in range(2): 390 axs.append([]) 391 for col in range(3): 392 axs[-1].append(fig.add_subplot(spec[row, col])) 393 394 ani = animation.FuncAnimation(fig, self.update_heatmap_data, self.__get_layer_num(), 395 fargs=(fig, axs), 396 interval=200, repeat=False) 397 if save_video_path: 398 save_ani_to_video(ani, save_video_path, video_fps) 399 # close before return to avoid dangling plot 400 plt.close() 401 return ani 402 403 def plot_error_heatmap(self, target_layer, vmin=None, vmax=None): 404 # Plot the diff heatmap for a given layer 405 target_diff = self.tensor_dict['cpu'][target_layer] - \ 406 self.tensor_dict['nnapi'][target_layer] 407 reshaped_target_diff = reshape_to_matrix(target_diff) 408 fig, ax = subplots(figsize=(9, 9)) 409 plt.title('Heat Map of Error between CPU and NNAPI') 410 sns.heatmap(reshaped_target_diff, 411 cmap=self.cmap, 412 mask=np.isnan(reshaped_target_diff), 413 center=0) 414 plt.show() 415 416 417############################ ModelDataComparison ############################ 418class ModelDataComparison: 419 """A class to store and compare multiple ModelData. 420 421 # Arguments 422 model_data_list: a list of ModelData to be compared. Can be modified through 423 the class variable. 424 """ 425 def __init__(self, dump_dir_list, android_build_top, tflite_model_json_dir, model_name): 426 self.dump_dir_list = dump_dir_list 427 self.android_build_top = android_build_top 428 self.tflite_model_json_dir = tflite_model_json_dir 429 self.set_model_name(model_name) 430 431 def set_model_name(self, model_name): 432 # Set model to be compared and load/ reload all model data 433 self.model_name = model_name 434 self.__load_data() 435 436 def __load_data(self): 437 # Load all model data 438 self.manager_list = [] 439 self.model_data_list = [] 440 for i, dump_dir in enumerate(self.dump_dir_list): 441 manager = ModelMetaDataManager(self.android_build_top, 442 dump_dir, 443 tflite_model_json_dir=self.tflite_model_json_dir) 444 model_data = ModelData(nnapi_model_name=self.model_name, manager=manager) 445 self.manager_list.append(manager) 446 self.model_data_list.append(model_data) 447 self.sanity_check() 448 449 def sanity_check(self): 450 # Check 451 # 1) if there are more than one model to be compared 452 # 2) The data has the same intermediate layers 453 assert(len(self.model_data_list) >= 1) 454 sample_model_data = self.model_data_list[0] 455 for i in range(1, len(self.model_data_list)): 456 assert(set(sample_model_data.tensor_dict['cpu'].keys()) == 457 set(self.model_data_list[i].tensor_dict['nnapi'].keys())) 458 print('Sanity Check Passed') 459 self.layers = sample_model_data.layers 460 self.mmd = sample_model_data.mmd 461 462 def update_hist_comparison_data(self, i, fig, axs, bins=50): 463 # Use % because there may be multiple testing samples 464 sample_model_data = self.model_data_list[0] 465 operation = self.mmd.output_meta_data[i % len(self.mmd.output_meta_data)]['operator_code'] 466 layer = self.layers[i] 467 subtitle = fig.suptitle('{} | {}\n{}' 468 .format(sample_model_data.nnapi_model_name, layer, operation), 469 fontsize='x-large') 470 for row in axs: 471 for ax in row: 472 ax.clear() 473 474 hist_ax = axs[0][0] 475 hist_ax.set_title('Diff Histogram') 476 labels = [dump_dir.split('/')[-2] for dump_dir in self.dump_dir_list] 477 cmap = sns.diverging_palette(220, 20, sep=20, as_cmap=True) 478 for i, ax in enumerate(axs[1]): 479 model_data = self.model_data_list[i] 480 axs[1][i].set_title(labels[i]) 481 reshaped_diff = reshape_to_matrix( 482 self.model_data_list[i].tensor_dict.calc_diff(layer, relative_error=False)) 483 sns.heatmap(reshaped_diff, cmap=cmap, cbar=True, ax=axs[1][i], cbar_ax=axs[2][i], 484 cbar_kws={"orientation": "horizontal"}, center=0) 485 sns.distplot(model_data.tensor_dict.calc_diff(layer, relative_error=False), bins=bins, 486 hist_kws={"log":True}, ax=hist_ax, kde=False) 487 hist_ax.legend(labels) 488 489 def gen_error_hist_comparison_animation(self, save_video_path=None, video_fps=10): 490 layers = self.layers 491 N = len(self.model_data_list) 492 widths = [1] * N 493 heights = [N * 0.7, 1, 0.2] 494 fig = plt.figure(figsize=(5 * N, 4 * N)) 495 gs = fig.add_gridspec(3, N, width_ratios=widths, height_ratios=heights) 496 axs = [[], [], []] 497 axs[0].append(fig.add_subplot(gs[0, :])) 498 for i in range(N): 499 # heatmap 500 axs[1].append(fig.add_subplot(gs[1, i])) 501 # colorbar 502 axs[2].append(fig.add_subplot(gs[2, i])) 503 ani = animation.FuncAnimation(fig, self.update_hist_comparison_data, len(layers), 504 fargs=(fig, axs), 505 interval=200, repeat=False) 506 if save_video_path: 507 save_ani_to_video(ani, save_video_path, video_fps) 508 # close before return to avoid dangling plot 509 plt.close() 510 return ani 511 512 513############################ NumpyEncoder ############################ 514class NumpyEncoder(json.JSONEncoder): 515 """Enable numpy array serilization in a dictionary. 516 517 Usage: 518 a = np.array([[1, 2, 3], [4, 5, 6]]) 519 json.dumps({'a': a, 'aa': [2, (2, 3, 4), a], 'bb': [2]}, cls=NumpyEncoder) 520 """ 521 def default(self, obj): 522 if isinstance(obj, np.ndarray): 523 return obj.tolist() 524 return json.JSONEncoder.default(self, obj) 525 526def main(args): 527 output_file_path = args.output_file_path if args.output_file_path else '/tmp/intermediate.html' 528 529 manager = ModelMetaDataManager( 530 args.android_build_top, 531 args.dump_dir, 532 tflite_model_json_dir='/tmp') 533 534 if args.no_parallel or args.model_name: 535 generation_func = manager.generate_animation_html 536 else: 537 generation_func = manager.multiprocessing_generate_animation_html 538 539 if args.model_name: 540 model_data = ModelData(nnapi_model_name=model_name, manager=manager) 541 print(model_data.tensor_dict) 542 generation_func(output_file_path=output_file_path, model_names=[args.model_name]) 543 else: 544 generation_func(output_file_path=output_file_path) 545 546 547if __name__ == '__main__': 548 # Example usage 549 # python tensor_utils.py ~/android/master/ ~/android/master/intermediate/ tts_float 550 parser = argparse.ArgumentParser(description='Utilities for parsing intermediate tensors.') 551 parser.add_argument('android_build_top', help='Your Android build top path.') 552 parser.add_argument('dump_dir', help='The dump dir pulled from the device.') 553 parser.add_argument('--model_name', help='NNAPI model name. Run all models if not specified.') 554 parser.add_argument('--output_file_path', help='Animation HTML path.') 555 parser.add_argument('--no_parallel', help='Run on a single process instead of multiple processes.') 556 args = parser.parse_args() 557 main(args)