1 #!/usr/bin/python3
2"""Read intermediate tensors generated by DumpAllTensors activity
3
4Tools for reading/ parsing intermediate tensors.
5"""
6
7import argparse
8import datetime
9import numpy as np
10import os
11import pandas as pd
12import tensorflow as tf
13import json
14import seaborn as sns
15import matplotlib
16matplotlib.use('Agg')
17import matplotlib.pyplot as plt
18import matplotlib.animation as animation
19import multiprocessing
20
21from matplotlib.pylab import *
22from tqdm import tqdm
23# Enable large animation size
24matplotlib.rcParams['animation.embed_limit'] = 2**128
25# Enable tensor.numpy()
26tf.compat.v1.enable_eager_execution()
27
28
29############################ Helper Functions ############################
30def reshape_to_matrix(array):
31  """Reshape an array to a square matrix padded with np.nan at the end."""
32  array = array.astype(float)
33  width = math.ceil(len(array)**0.5)
34  height = math.ceil(len(array)/ width)
35  padded = np.pad(array=array,
36                  pad_width=(0, width * height - len(array)),
37                  mode='constant',
38                  constant_values=np.nan)
39  padded = padded.reshape(width, -1)
40  return padded
41
42def save_ani_to_video(ani, save_video_path, video_fps=5):
43  Writer = animation.writers['ffmpeg']
44  writer = Writer(fps=video_fps)
45  #Save the movie
46  ani.save(save_video_path, writer=writer, dpi=250)
47
48def save_ani_to_html(ani, save_html_path):
49  with open(save_html_path, 'w') as f:
50    f.write(ani.to_jshtml())
51
52############################ ModelMetaDataManager ############################
53class ModelMetaDataManager(object):
54  """Maps model name in nnapi to its graph architecture with lazy initialization.
55
56  # Arguments
57    android_build_top: the root directory of android source tree dump_dir:
58    directory containing intermediate tensors pulled from device
59    tflite_model_json_path: directory containing intermediate json output of
60    model visualization tool (third_party/tensorflow/lite/tools:visualize) The
61    json output path from the tool is always /tmp.
62  """
63  ############################ ModelMetaData ############################
64  class ModelMetaData(object):
65    """Store graph information of a model."""
66
67    def __init__(self, tflite_model_json_path='/tmp'):
68      with open(tflite_model_json_path, 'rb') as f:
69        model_json = json.load(f)
70      self.operators = model_json['subgraphs'][0]['operators']
71      self.operator_codes = [item['builtin_code']\
72                            for item in model_json['operator_codes']]
73      self.output_meta_data = []
74      self.load_output_meta_data()
75
76    def load_output_meta_data(self):
77      for operator in self.operators:
78        data = {}
79        # Each operator can only have one output
80        assert(len(operator['outputs']) == 1)
81        data['output_tensor_index'] = operator['outputs'][0]
82        data['fused_activation_function'] = operator\
83          .get('builtin_options', {})\
84          .get('fused_activation_function', '')
85        data['operator_code'] = self.operator_codes[operator['opcode_index']]
86        self.output_meta_data.append(data)
87
88  def __init__(self, android_build_top, dump_dir, tflite_model_json_dir='/tmp'):
89    # key: nnapi model name, value: ModelMetaData
90    self.models = dict()
91    self.ANDROID_BUILD_TOP = android_build_top + "/"
92    self.TFLITE_MODEL_JSON_DIR = tflite_model_json_dir + "/"
93    self.DUMP_DIR = dump_dir + "/"
94    self.nnapi_to_tflite_name = dict()
95    self.tflite_to_nnapi_name = dict()
96    self.__load_mobilenet_topk_aosp()
97    self.model_names = sorted(os.listdir(dump_dir))
98
99  def __load_mobilenet_topk_aosp(self):
100    """Load information about tflite and nnapi model names."""
101    json_path = '{}/{}'.format(
102        self.ANDROID_BUILD_TOP,
103        'test/mlts/models/assets/models_list/mobilenet_topk_aosp.json')
104    with open(json_path, 'rb') as f:
105      topk_aosp = json.load(f)
106    for model in topk_aosp['models']:
107      self.nnapi_to_tflite_name[model['name']] = model['modelFile']
108      self.tflite_to_nnapi_name[model['modelFile']] = model['name']
109
110  def __get_model_json_path(self, tflite_model_name):
111    """Return tflite model jason path."""
112    json_path = '{}/{}.json'.format(self.TFLITE_MODEL_JSON_DIR,
113                                    tflite_model_name)
114    return json_path
115
116  def __load_model(self, tflite_model_name):
117    """Initialize a ModelMetaData for this model."""
118    model = self.ModelMetaData(self.__get_model_json_path(tflite_model_name))
119    nnapi_model_name = self.model_name_tflite_to_nnapi(tflite_model_name)
120    self.models[nnapi_model_name] = model
121
122  def model_name_nnapi_to_tflite(self, nnapi_model_name):
123    return self.nnapi_to_tflite_name.get(nnapi_model_name, nnapi_model_name)
124
125  def model_name_tflite_to_nnapi(self, tflite_model_name):
126    return self.tflite_to_nnapi_name.get(tflite_model_name, tflite_model_name)
127
128  def get_model_meta_data(self, nnapi_model_name):
129    """Retrieve the ModelMetaData with lazy initialization."""
130    tflite_model_name = self.model_name_nnapi_to_tflite(nnapi_model_name)
131    if nnapi_model_name not in self.models:
132      self.__load_model(tflite_model_name)
133    return self.models[nnapi_model_name]
134
135  def generate_animation_html(self, output_file_path, model_names=None, heatmap=True):
136    """Generate a html file containing the hist and heatmap animation of all models"""
137    model_names = self.model_names if model_names is None else model_names
138    html_data = ''
139    for model_name in tqdm(model_names):
140      print(datetime.datetime.now(), 'Processing', model_name)
141      html_data += '<h3>{}</h3>'.format(model_name)
142      model_data = ModelData(nnapi_model_name=model_name, manager=self)
143      ani = model_data.gen_error_hist_animation()
144      html_data += ani.to_jshtml()
145      if heatmap:
146        ani = model_data.gen_heatmap_animation()
147        html_data += ani.to_jshtml()
148    with open(output_file_path, 'w') as f:
149      f.write(html_data)
150
151  def generate_hist_animation_html(self, model_name):
152    """Generate a html hist animation for a model, used for multiprocessing"""
153    html_data = '<h3>{}</h3>'.format(model_name)
154    model_data = ModelData(nnapi_model_name=model_name, manager=self)
155    ani = model_data.gen_error_hist_animation()
156    html_data += ani.to_jshtml()
157    print(datetime.datetime.now(), "Done histogram for", model_name)
158    self.return_dict[model_name + "-hist"] = html_data
159
160  def generate_heatmap_animation_html(self, model_name):
161    """Generate a html hist animation for a model, used for multiprocessing"""
162    model_data = ModelData(nnapi_model_name=model_name, manager=self)
163    ani = model_data.gen_heatmap_animation()
164    html_data = ani.to_jshtml()
165    print(datetime.datetime.now(), "Done heatmap for", model_name)
166    self.return_dict[model_name + "-heatmap"] = html_data
167
168  def multiprocessing_generate_animation_html(self, output_file_path,
169                                       model_names=None, heatmap=True):
170    """
171    Generate a html file containing the hist and heatmap animation of all models
172    with multiple process.
173    """
174    model_names = self.model_names if model_names is None else model_names
175    manager = multiprocessing.Manager()
176    self.return_dict = manager.dict()
177    jobs = []
178    for model_name in model_names:
179      for target_func in [self.generate_hist_animation_html, self.generate_heatmap_animation_html]:
180        p = multiprocessing.Process(target=target_func, args=(model_name,))
181        jobs.append(p)
182        p.start()
183    # wait for completion
184    for job in jobs:
185      job.join()
186
187    with open(output_file_path, 'w') as f:
188      for model_name in model_names:
189        f.write(self.return_dict[model_name + "-hist"])
190        f.write(self.return_dict[model_name + "-heatmap"])
191
192
193############################ TensorDict ############################
194class TensorDict(dict):
195  """A class to store cpu and nnapi tensors.
196
197  # Arguments
198    model_dir: directory containing intermediate tensors pulled from device
199  """
200  def __init__(self, model_dir):
201    super().__init__()
202    for useNNAPIDir in ['cpu', 'nnapi']:
203      dir_path = model_dir + useNNAPIDir + "/"
204      self[useNNAPIDir] = self.read_tensors_from_dir(dir_path)
205    self.tensor_sanity_check()
206    self.max_absolute_diff, self.min_absolute_diff = 0.0, 0.0
207    self.max_relative_diff, self.min_relative_diff = 0.0, 0.0
208    self.layers = sorted(self['cpu'].keys())
209    self.calc_range()
210
211  def bytes_to_numpy_tensor(self, file_path):
212    """Load bytes outputed from DumpIntermediateTensor into numpy tensor."""
213    if 'quant' in file_path or '8bit' in file_path:
214      tensor_type = tf.int8
215    else:
216      tensor_type = tf.float32
217    with open(file_path, mode='rb') as f:
218      tensor_bytes = f.read()
219      tensor = tf.decode_raw(input_bytes=tensor_bytes, out_type=tensor_type)
220    if np.isnan(np.sum(tensor)):
221      print('WARNING: tensor contains inf or nan')
222    return tensor.numpy()
223
224  def read_tensors_from_dir(self, dir_path):
225    tensor_dict = dict()
226    for tensor_file in os.listdir(dir_path):
227      tensor = self.bytes_to_numpy_tensor(dir_path + tensor_file)
228      tensor_dict[tensor_file] = tensor
229    return tensor_dict
230
231  def tensor_sanity_check(self):
232    # Make sure the cpu tensors and nnapi tensors have the same outputs
233    assert(set(self['cpu'].keys()) == set(self['nnapi'].keys()))
234    print('Tensor sanity check passed')
235
236  def calc_range(self):
237    for layer in self.layers:
238      diff = self.calc_diff(layer, relative_error=False)
239      # update absolute max, min
240      self.max_absolute_diff = max(self.max_absolute_diff, np.max(diff))
241      self.min_absolute_diff = min(self.min_absolute_diff, np.min(diff))
242      self.absolute_range = max(abs(self.min_absolute_diff),
243                                abs(self.max_absolute_diff))
244
245  def calc_diff(self, layer, relative_error=True):
246    cpu_tensor = self['cpu'][layer]
247    nnapi_tensor = self['nnapi'][layer]
248    assert(cpu_tensor.shape == nnapi_tensor.shape)
249    diff = cpu_tensor - nnapi_tensor
250    if not relative_error:
251      return diff
252    diff = diff.astype(float)
253    cpu_tensor = cpu_tensor.astype(float)
254    # Devide by max so the relative error range is conveniently [-1, 1]
255    max_cpu_nnapi_tensor = np.maximum(np.abs(cpu_tensor), np.abs(nnapi_tensor))
256    relative_diff = np.divide(diff, max_cpu_nnapi_tensor, out=np.zeros_like(diff),
257                              where=max_cpu_nnapi_tensor>0)
258    return relative_diff
259
260  def gen_tensor_diff_stats(self, relative_error=True, return_df=True, plot_diff=False):
261    stats = []
262    for layer in self.layers:
263      diff = self.calc_diff(layer, relative_error)
264      if plot_diff:
265        self.plot_tensor_diff(diff)
266      if return_df:
267        stats.append({
268          'layer': layer,
269          'min': np.min(diff),
270          'max': np.max(diff),
271          'mean': np.mean(diff),
272          'median': np.median(diff)
273        })
274    if return_df:
275      return pd.DataFrame(stats)
276
277  def plot_tensor_diff(diff):
278    plt.figure()
279    plt.hist(diff, bins=50, log=True)
280    plt.plot()
281
282
283############################ Model Data ############################
284class ModelData(object):
285  """A class to store all relevant inormation of a model.
286
287  # Arguments
288    nnapi_model_name: the name of the model
289    manager: ModelMetaDataManager
290  """
291  def __init__(self, nnapi_model_name, manager, seq_limit=10):
292    self.nnapi_model_name = nnapi_model_name
293    self.manager = manager
294    self.model_dir = self.get_target_model_dir(manager.DUMP_DIR,
295                                               nnapi_model_name)
296    self.tensor_dict = TensorDict(self.model_dir)
297    self.mmd = manager.get_model_meta_data(nnapi_model_name)
298    self.stats = self.tensor_dict.gen_tensor_diff_stats(relative_error=True,
299                                                        return_df=True)
300    self.layers = sorted(self.tensor_dict['cpu'].keys())
301    self.cmap = sns.diverging_palette(220, 20, sep=20, as_cmap=True)
302    self.seq_limit = seq_limit
303
304  def get_target_model_dir(self, dump_dir, target_model_name):
305    # Get the model directory path
306    target_model_dir = dump_dir + target_model_name + "/"
307    return target_model_dir
308
309  def __sns_distplot(self, layer, bins, ax, range, relative_error):
310    sns.distplot(self.tensor_dict.calc_diff(layer, relative_error=relative_error), bins=bins,
311             hist_kws={"range":range, "log":True}, ax=ax, kde=False)
312
313  def __plt_hist(self, layer, bins, ax, range, relative_error):
314    ax.hist(self.tensor_dict.calc_diff(layer, relative_error=relative_error), bins=bins,
315             range=range, log=True)
316
317  def __get_layer_num(self):
318    if self.seq_limit:
319      return min(len(self.layers), len(self.mmd.output_meta_data) * self.seq_limit)
320    return len(self.layers)
321
322  def update_hist_data(self, i, fig, ax1, ax2, bins=50, plot_library='sns'):
323    # Use % because there may be multiple testing samples
324    operation = self.mmd.output_meta_data[i % len(self.mmd.output_meta_data)]['operator_code']
325    layer = self.layers[i]
326    subtitle = fig.suptitle('{} | {}\n{}'
327                      .format(self.nnapi_model_name, layer, operation),
328                      fontsize='x-large')
329    for ax in (ax1, ax2):
330      ax.clear()
331    ax1.set_title('Relative Error')
332    ax2.set_title('Absolute Error')
333    absolute_range = self.tensor_dict.absolute_range
334
335    # Determine underlying plotting library
336    hist_func = self.__plt_hist if plot_library == 'matplotlib' else self.__sns_distplot
337    hist_func(layer=layer, bins=bins, ax=ax1,
338              range=(-1, 1), relative_error=True)
339    hist_func(layer=layer, bins=bins, ax=ax2,
340              range=(-absolute_range, absolute_range), relative_error=False)
341
342  def gen_error_hist_animation(self, save_video_path=None, video_fps=10):
343    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12,9))
344    ani = animation.FuncAnimation(fig, self.update_hist_data, self.__get_layer_num(),
345                                  fargs=(fig, ax1, ax2),
346                                  interval=200, repeat=False)
347    # close before return to avoid dangling plot
348    if save_video_path:
349      save_ani_to_video(ani, save_video_path, video_fps)
350    plt.close()
351    return ani
352
353  def __sns_heatmap(self, data, ax, cbar_ax, **kwargs):
354    return sns.heatmap(data, cmap=self.cmap, cbar=True, ax=ax, cbar_ax=cbar_ax,
355                     cbar_kws={"orientation": "horizontal"}, center=0, **kwargs)
356
357  def update_heatmap_data(self, i, fig, axs):
358    # Use % because there may be multiple testing samples
359    operation = self.mmd.output_meta_data[i % len(self.mmd.output_meta_data)]['operator_code']
360    layer = self.layers[i]
361    subtitle = fig.suptitle('{} | {}\n{}\n'
362                      .format(self.nnapi_model_name, layer, operation),
363                      fontsize='x-large')
364    # Clear all the axs and redraw
365    # It's important to clear the colorbars as well to avoid duplicate colorbars
366    for ax_tuple in axs:
367      for ax in ax_tuple:
368        ax.clear()
369    axs[0][0].set_title('Diff')
370    axs[0][1].set_title('CPU Tensor')
371    axs[0][2].set_title('NNAPI Tensor')
372
373    reshaped_diff = reshape_to_matrix(self.tensor_dict.calc_diff(layer, relative_error=False))
374    reshaped_cpu = reshape_to_matrix(self.tensor_dict['cpu'][layer])
375    reshaped_nnapi = reshape_to_matrix(self.tensor_dict['nnapi'][layer])
376    absolute_range = self.tensor_dict.absolute_range
377    g1 = self.__sns_heatmap(data=reshaped_diff, ax=axs[0][0], cbar_ax=axs[1][0],
378                            vmin=-absolute_range, vmax=absolute_range)
379    g2 = self.__sns_heatmap(data=reshaped_cpu, ax=axs[0][1], cbar_ax=axs[1][1])
380    g3 = self.__sns_heatmap(data=reshaped_nnapi, ax=axs[0][2], cbar_ax=axs[1][2])
381
382  def gen_heatmap_animation(self, save_video_path=None, video_fps=10, figsize=(13,6)):
383    fig = plt.figure(constrained_layout=True, figsize=figsize)
384    widths = [1, 1, 1]
385    heights = [7, 1]
386    spec = fig.add_gridspec(ncols=3, nrows=2, width_ratios=widths,
387                            height_ratios=heights)
388    axs = []
389    for row in range(2):
390      axs.append([])
391      for col in range(3):
392          axs[-1].append(fig.add_subplot(spec[row, col]))
393
394    ani = animation.FuncAnimation(fig, self.update_heatmap_data, self.__get_layer_num(),
395                                  fargs=(fig, axs),
396                                  interval=200, repeat=False)
397    if save_video_path:
398      save_ani_to_video(ani, save_video_path, video_fps)
399    # close before return to avoid dangling plot
400    plt.close()
401    return ani
402
403  def plot_error_heatmap(self, target_layer, vmin=None, vmax=None):
404    # Plot the diff heatmap for a given layer
405    target_diff = self.tensor_dict['cpu'][target_layer] - \
406                  self.tensor_dict['nnapi'][target_layer]
407    reshaped_target_diff = reshape_to_matrix(target_diff)
408    fig, ax = subplots(figsize=(9, 9))
409    plt.title('Heat Map of Error between CPU and NNAPI')
410    sns.heatmap(reshaped_target_diff,
411                cmap=self.cmap,
412                mask=np.isnan(reshaped_target_diff),
413                center=0)
414    plt.show()
415
416
417############################ ModelDataComparison ############################
418class ModelDataComparison:
419  """A class to store and compare multiple ModelData.
420
421  # Arguments
422    model_data_list: a list of ModelData to be compared. Can be modified through
423    the class variable.
424  """
425  def __init__(self, dump_dir_list, android_build_top, tflite_model_json_dir, model_name):
426    self.dump_dir_list = dump_dir_list
427    self.android_build_top = android_build_top
428    self.tflite_model_json_dir = tflite_model_json_dir
429    self.set_model_name(model_name)
430
431  def set_model_name(self, model_name):
432    # Set model to be compared and load/ reload all model data
433    self.model_name = model_name
434    self.__load_data()
435
436  def __load_data(self):
437    # Load all model data
438    self.manager_list = []
439    self.model_data_list = []
440    for i, dump_dir in enumerate(self.dump_dir_list):
441      manager = ModelMetaDataManager(self.android_build_top,
442                                     dump_dir,
443                                     tflite_model_json_dir=self.tflite_model_json_dir)
444      model_data = ModelData(nnapi_model_name=self.model_name, manager=manager)
445      self.manager_list.append(manager)
446      self.model_data_list.append(model_data)
447    self.sanity_check()
448
449  def sanity_check(self):
450    # Check
451    # 1) if there are more than one model to be compared
452    # 2) The data has the same intermediate layers
453    assert(len(self.model_data_list) >= 1)
454    sample_model_data = self.model_data_list[0]
455    for i in range(1, len(self.model_data_list)):
456      assert(set(sample_model_data.tensor_dict['cpu'].keys()) ==
457             set(self.model_data_list[i].tensor_dict['nnapi'].keys()))
458    print('Sanity Check Passed')
459    self.layers = sample_model_data.layers
460    self.mmd = sample_model_data.mmd
461
462  def update_hist_comparison_data(self, i, fig, axs, bins=50):
463    # Use % because there may be multiple testing samples
464    sample_model_data = self.model_data_list[0]
465    operation = self.mmd.output_meta_data[i % len(self.mmd.output_meta_data)]['operator_code']
466    layer = self.layers[i]
467    subtitle = fig.suptitle('{} | {}\n{}'
468                      .format(sample_model_data.nnapi_model_name, layer, operation),
469                      fontsize='x-large')
470    for row in axs:
471      for ax in row:
472        ax.clear()
473
474    hist_ax = axs[0][0]
475    hist_ax.set_title('Diff Histogram')
476    labels = [dump_dir.split('/')[-2] for dump_dir in self.dump_dir_list]
477    cmap = sns.diverging_palette(220, 20, sep=20, as_cmap=True)
478    for i, ax in enumerate(axs[1]):
479      model_data = self.model_data_list[i]
480      axs[1][i].set_title(labels[i])
481      reshaped_diff = reshape_to_matrix(
482        self.model_data_list[i].tensor_dict.calc_diff(layer, relative_error=False))
483      sns.heatmap(reshaped_diff, cmap=cmap, cbar=True, ax=axs[1][i], cbar_ax=axs[2][i],
484                     cbar_kws={"orientation": "horizontal"}, center=0)
485      sns.distplot(model_data.tensor_dict.calc_diff(layer, relative_error=False), bins=bins,
486             hist_kws={"log":True}, ax=hist_ax, kde=False)
487    hist_ax.legend(labels)
488
489  def gen_error_hist_comparison_animation(self, save_video_path=None, video_fps=10):
490    layers = self.layers
491    N = len(self.model_data_list)
492    widths = [1] * N
493    heights = [N * 0.7, 1, 0.2]
494    fig = plt.figure(figsize=(5 * N, 4 * N))
495    gs = fig.add_gridspec(3, N, width_ratios=widths, height_ratios=heights)
496    axs = [[], [], []]
497    axs[0].append(fig.add_subplot(gs[0, :]))
498    for i in range(N):
499      # heatmap
500      axs[1].append(fig.add_subplot(gs[1, i]))
501      # colorbar
502      axs[2].append(fig.add_subplot(gs[2, i]))
503    ani = animation.FuncAnimation(fig, self.update_hist_comparison_data, len(layers),
504                                  fargs=(fig, axs),
505                                  interval=200, repeat=False)
506    if save_video_path:
507      save_ani_to_video(ani, save_video_path, video_fps)
508    # close before return to avoid dangling plot
509    plt.close()
510    return ani
511
512
513############################ NumpyEncoder ############################
514class NumpyEncoder(json.JSONEncoder):
515  """Enable numpy array serilization in a dictionary.
516
517  Usage:
518    a = np.array([[1, 2, 3], [4, 5, 6]])
519    json.dumps({'a': a, 'aa': [2, (2, 3, 4), a], 'bb': [2]}, cls=NumpyEncoder)
520  """
521  def default(self, obj):
522      if isinstance(obj, np.ndarray):
523          return obj.tolist()
524      return json.JSONEncoder.default(self, obj)
525
526def main(args):
527  output_file_path = args.output_file_path if args.output_file_path else '/tmp/intermediate.html'
528
529  manager = ModelMetaDataManager(
530    args.android_build_top,
531    args.dump_dir,
532    tflite_model_json_dir='/tmp')
533
534  if args.no_parallel or args.model_name:
535    generation_func = manager.generate_animation_html
536  else:
537    generation_func = manager.multiprocessing_generate_animation_html
538
539  if args.model_name:
540    model_data = ModelData(nnapi_model_name=model_name, manager=manager)
541    print(model_data.tensor_dict)
542    generation_func(output_file_path=output_file_path, model_names=[args.model_name])
543  else:
544    generation_func(output_file_path=output_file_path)
545
546
547if __name__ == '__main__':
548  # Example usage
549  # python tensor_utils.py ~/android/master/ ~/android/master/intermediate/ tts_float
550  parser = argparse.ArgumentParser(description='Utilities for parsing intermediate tensors.')
551  parser.add_argument('android_build_top', help='Your Android build top path.')
552  parser.add_argument('dump_dir', help='The dump dir pulled from the device.')
553  parser.add_argument('--model_name', help='NNAPI model name. Run all models if not specified.')
554  parser.add_argument('--output_file_path', help='Animation HTML path.')
555  parser.add_argument('--no_parallel', help='Run on a single process instead of multiple processes.')
556  args = parser.parse_args()
557  main(args)