1""" NNAPI systrace parser - aggegation of timing from multiple threads """
2
3# TODO:
4# - phase and layer totals
5import math
6
7from parser.naming import layers, phases, subphases
8from parser.naming import (PHASE_OVERALL, PHASE_TERMINATION, PHASE_WARMUP,
9                           PHASE_BENCHMARK, PHASE_EXECUTION, PHASE_INITIALIZATION,
10                           PHASE_INPUTS_AND_OUTPUTS, PHASE_RESULTS)
11from parser.naming import LAYER_APPLICATION, LAYER_IPC, LAYER_DRIVER
12LAYER_TOTAL = "LT"  # Total across layers
13
14def aggregate_times(tracker_map, special_case_lr_pe=True):
15  """ Takes the trackers for each thread and produces timing statistics for
16      all layers and phases.
17
18      Returns (times, self_times, has_warmup and has_benchmark, execution_counts),
19      where:
20        - times and self_times are nested dictionaries of the form
21          phase -> layer -> time with the following notes:
22          - phase is flattened over all phases, except PHASE_WARMUP and
23            PHASE_BENCHMARK, where the structure is phase -> phase -> layer -> time
24          - PHASE_WARMUP and PHASE_BENCHMARK only nest execution and its
25            subphases
26          - PHASE_WARMUP and PHASE_BENCHMARK are not present if the trace does
27            not contain them
28          - the first level phase contains total over PHASE_WARMUP and
29            PHASE_BENCHMARK if present
30          - time may be math.nan if the data is not present in the trace
31          - in addition to the layer from parser.naming, LAYER_TOTAL holds
32            the total time spent in that layer over all phases
33        - execution_counts contains a dictionary of the form
34          {PHASE_OVERALL, PHASE_WARMUP, PHASE_BENCHMARK} -> no of executions
35  """
36  all_application_phases = [PHASE_OVERALL, PHASE_WARMUP, PHASE_BENCHMARK]
37  # Calculate execution counts
38  execution_counts = dict()
39  for app_phase in all_application_phases:
40    execution_count = 0
41    for pid in tracker_map:
42      execution_count = max(execution_count, tracker_map[pid].get_execution_count(app_phase))
43    execution_counts[app_phase] = execution_count
44  has_warmup = bool(execution_counts[PHASE_WARMUP])
45  has_benchmark = bool(execution_counts[PHASE_BENCHMARK])
46  if not (has_warmup and has_benchmark):
47    all_application_phases = [PHASE_OVERALL]
48
49  # Create dicts
50  times = {}
51  self_times = {}
52  if has_warmup and has_benchmark:
53    for app_phase in [PHASE_WARMUP, PHASE_BENCHMARK]:
54      times[app_phase] = {}
55      self_times[app_phase] = {}
56      for phase in _phase_and_subphases(PHASE_EXECUTION):
57        times[app_phase][phase] = {}
58        self_times[app_phase][phase] = {}
59  for phase in phases + [PHASE_OVERALL] + subphases[PHASE_EXECUTION]:
60    times[phase] = {}
61    self_times[phase] = {}
62
63  # Gather total times from all threads, calculate layer and phase totals
64  for layer in layers:
65    for phase0 in [PHASE_OVERALL] + phases:
66      for phase in _phase_and_subphases(phase0):
67        t = 0.0
68        tag = layer + "_" + phase
69        for app_phase in all_application_phases:
70          t0 = 0.0
71          if layer == LAYER_DRIVER and phase == PHASE_EXECUTION:
72            # Calculate driver execution times from begins and ends
73            begins = []
74            ends = []
75            for pid in tracker_map:
76              begins = begins + tracker_map[pid].get_ld_pe_begins(app_phase)
77              ends = ends + tracker_map[pid].get_ld_pe_ends(app_phase)
78            assert len(begins) == len(ends)
79            begins.sort()
80            ends.sort()
81            for i in range(0, len(begins)):
82              t0 += (ends[i] - begins[i])
83          else:
84            for pid in tracker_map:
85              t0 += tracker_map[pid].get_stat(tag, app_phase, special_case_lr_pe)
86          if phase0 == PHASE_EXECUTION and (app_phase != PHASE_OVERALL):
87            times[app_phase][phase][layer] = zero_to_nan_if_missing(t0, phase, layer)
88          t += t0
89        times[phase][layer] = zero_to_nan_if_missing(t, phase, layer)
90    if not times[PHASE_OVERALL][layer]:
91      times[PHASE_OVERALL][layer] = sum(nan_to_zero(times[phase][layer]) for phase in phases)
92  for phase0 in [PHASE_OVERALL] + phases:
93    for phase in _phase_and_subphases(phase0):
94      times[phase][LAYER_TOTAL] = max_ignoring_nans(times[phase].values())
95      if phase0 == PHASE_EXECUTION and (has_warmup and has_benchmark):
96        for app_phase in [PHASE_WARMUP, PHASE_BENCHMARK]:
97          times[app_phase][phase][LAYER_TOTAL] = max_ignoring_nans(times[app_phase][phase].values())
98
99  # Calculate self-times for each layer
100  for phase0 in [PHASE_OVERALL] + phases:
101    for phase in _phase_and_subphases(phase0):
102      self_times[phase][LAYER_TOTAL] = times[phase][LAYER_TOTAL]
103      if phase0 == PHASE_EXECUTION and (has_warmup and has_benchmark):
104        for app_phase in [PHASE_WARMUP, PHASE_BENCHMARK]:
105          self_times[app_phase][phase][LAYER_TOTAL] = times[app_phase][phase][LAYER_TOTAL]
106      t = 0.0
107      for layer in reversed(layers):
108        if math.isnan(times[phase][layer]):
109          self_times[phase][layer] = math.nan
110        elif times[phase][layer] == 0.0:
111          self_times[phase][layer] = 0.0
112        elif (phase == PHASE_OVERALL and
113              (layer == LAYER_DRIVER or layer == LAYER_IPC) and
114              times[PHASE_EXECUTION][LAYER_DRIVER] == 0.0):
115          # Driver was only used for initialization phase, did not support
116          # execution of the model
117          if layer == LAYER_DRIVER:
118            self_times[phase][layer] = times[phase][layer]
119          else:
120            self_times[phase][layer] = times[phase][layer] - times[phase][LAYER_DRIVER]
121        else:
122          self_times[phase][layer] = times[phase][layer] - t
123          t = times[phase][layer]
124      if phase0 == PHASE_EXECUTION and (has_benchmark or has_warmup):
125        for app_phase in [PHASE_WARMUP, PHASE_BENCHMARK]:
126          t = 0.0
127          for layer in reversed(layers):
128            if math.isnan(times[app_phase][phase][layer]):
129              self_times[app_phase][phase][layer] = math.nan
130            elif times[app_phase][phase][layer] == 0.0:
131              self_times[app_phase][phase][layer] = 0.0
132            else:
133              self_times[app_phase][phase][layer] = times[app_phase][phase][layer] - t
134              t = times[app_phase][phase][layer]
135
136  return (times, self_times, has_warmup and has_benchmark, execution_counts)
137
138def zero_to_nan_if_missing(f, phase, layer):
139  """ Turn zero time to a NaN to indicate missing data, when we think that
140      the data is really missing. Data should only be missing from the
141      Application layer (applications may not have any tracing) and
142      the subphases of Execution in the Driver layer (other phases are
143      discernible from the automatic HIDL tracepoints)."""
144  if f == 0.0:
145    if layer == LAYER_APPLICATION:
146      return math.nan
147    if layer == LAYER_DRIVER and phase in subphases[PHASE_EXECUTION]:
148      return math.nan
149  return f
150
151def nan_to_zero(f):
152  if math.isnan(f):
153    return 0.0
154  return f
155
156def _phase_and_subphases(phase):
157  if phase == PHASE_OVERALL:
158    return [phase]
159  if phase == PHASE_WARMUP or phase == PHASE_BENCHMARK:
160    return []
161  return [phase] + subphases.get(phase, [])
162
163def max_ignoring_nans(xs):
164  return max(map(nan_to_zero, xs))
165