1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 // to demonstrate the performance difference between ION and HLOS memory
17 // for sharing with ADSP.
18 #define USE_ION_MEMORY
19 
20 #include <limits.h>
21 #include <stdio.h>
22 
23 #include "hexagon_controller.h"
24 #include "hexagon_nn.h"
25 #include "tfm_log.h"
26 
27 static const uint32_t MAX_NODES = 2048;
28 static const uint32_t MAX_EVENT_COUNT = 256;
29 
30 static const bool DUMP_OUTPUT = false;
31 static const bool DBG_EXECUTION = true;
32 
33 static const int OUT_RANKING_SIZE = 5;
34 
35 // static only for this file.
36 // TODO(satok): allocate dynamically
37 static float s_output_values[300 * 300 * 3 * 4];
38 
39 extern void init_graph(uint32_t id);
40 extern void init_graph_v1(uint32_t id);
41 extern uint8_t inception_dummy_int_data_299x299[];
42 extern uint8_t inception_dummy_int_data_224x224[];
43 extern float inception_dummy_float_data_299x299[];
44 
45 enum InceptionVersion {
46   INCEPTION_V1,
47   INCEPTION_V3,
48 };
49 
50 static enum InceptionVersion s_inception_version = INCEPTION_V3;
51 
52 /////////////////////////////////////////////////
53 // file local functions
54 
ConvertGraphInfoIdToName(unsigned int id)55 static const char* ConvertGraphInfoIdToName(unsigned int id) {
56   // TODO(satok): implement
57   return "?";
58 }
59 
ConvertGraphInfoIdToOpName(unsigned int id)60 static const char* ConvertGraphInfoIdToOpName(unsigned int id) {
61   // TODO(satok): implement
62   return "?";
63 }
64 
65 /////////////////////////////////////////////////
66 // file local utilities
FindMaxIdxWithExcludeList(const float * data,uint32_t entries,const int exclude_size,const int * exclude_idx)67 static uint32_t FindMaxIdxWithExcludeList(const float* data, uint32_t entries,
68                                           const int exclude_size,
69                                           const int* exclude_idx) {
70   int i;
71   float maxval = data[0];
72   int maxidx = 0;
73   for (i = 0; i < entries; i++) {
74     bool exclude = false;
75     for (int j = 0; j < exclude_size; ++j) {
76       if (exclude_idx[j] == i) {
77         exclude = true;
78         break;
79       }
80     }
81     if (exclude) {
82       continue;
83     }
84     if (maxval < data[i]) {
85       maxval = data[i];
86       maxidx = i;
87     }
88   }
89   return maxidx;
90 }
91 
FindMaxIdx(const float * data,uint32_t entries)92 static uint32_t FindMaxIdx(const float* data, uint32_t entries) {
93   return FindMaxIdxWithExcludeList(data, entries, 0, NULL);
94 }
95 
hexagon_controller_PrintMaxNIdx(const float * data,const uint32_t entries,const int n,int * out_ranking)96 void hexagon_controller_PrintMaxNIdx(const float* data, const uint32_t entries,
97                                      const int n, int* out_ranking) {
98   if (DUMP_OUTPUT) {
99     for (int i = 0; i < entries; ++i) {
100       TFMLOGD("%d: val = %f", i, data[i]);
101     }
102   }
103   if (n >= entries) {
104     TFMLOGD("Too many N %d >= %d", n, entries);
105   }
106   for (int i = 0; i < n; ++i) {
107     out_ranking[i] = INT_MAX;
108   }
109   for (int i = 0; i < n; ++i) {
110     out_ranking[i] = FindMaxIdxWithExcludeList(data, entries, n, out_ranking);
111   }
112   TFMLOGD("=== RANKING ===");
113   for (int i = 0; i < n; ++i) {
114     TFMLOGD("%d: id = %d, val = %f", i, out_ranking[i], data[out_ranking[i]]);
115   }
116 }
117 
GetCounter(hexagon_nn_perfinfo s)118 static inline unsigned long long int GetCounter(hexagon_nn_perfinfo s) {
119   unsigned long long int ret;
120   ret = s.counter_hi;
121   ret <<= 32;
122   ret |= s.counter_lo;
123   return ret;
124 }
125 
CompareCycle(const void * va,const void * vb)126 static int CompareCycle(const void* va, const void* vb) {
127   const hexagon_nn_perfinfo* a = va;
128   const hexagon_nn_perfinfo* b = vb;
129   unsigned long long int acount = GetCounter(*a);
130   unsigned long long int bcount = GetCounter(*b);
131   if (acount < bcount) {
132     return -1;
133   } else if (acount > bcount) {
134     return 1;
135   } else {
136     return 0;
137   }
138 }
139 
140 /////////////////////////////////////////////////
141 // Graph functions
142 
hexagon_controller_InstantiateGraph()143 uint32_t hexagon_controller_InstantiateGraph() {
144   const uint32_t nn_id = hexagon_nn_init();
145   // TODO(satok): make this as argument
146   hexagon_nn_set_debug_level(nn_id, 0);
147   return nn_id;
148 }
149 
hexagon_controller_InitGraph(int version,uint32_t nn_id)150 void hexagon_controller_InitGraph(int version, uint32_t nn_id) {
151   if (version == 1) {
152     s_inception_version = INCEPTION_V1;
153   } else if (version == 3) {
154     s_inception_version = INCEPTION_V3;
155   } else {
156     TFMLOGE("Unsupported inception version %d", version);
157     return;
158   }
159   if (s_inception_version == INCEPTION_V3) {
160     init_graph(nn_id);
161   } else if (s_inception_version == INCEPTION_V1) {
162     init_graph_v1(nn_id);
163   }
164   TFMLOGD("Init graph (inception version = %d) done.", version);
165 }
166 
hexagon_controller_ConstructGraph(uint32_t nn_id)167 bool hexagon_controller_ConstructGraph(uint32_t nn_id) {
168   int err;
169   if ((err = hexagon_nn_prepare(nn_id)) != 0) {
170     TFMLOGE("Prepare failed! returned 0x%x\n", err);
171     DumpNNId(nn_id);
172     return false;
173   } else {
174     TFMLOGD("Prepare success!\n");
175     return true;
176   }
177 }
178 
hexagon_controller_SetupGraph(int version)179 uint32_t hexagon_controller_SetupGraph(int version) {
180   const uint32_t nn_id = hexagon_controller_InstantiateGraph();
181   hexagon_controller_InitGraph(version, nn_id);
182   hexagon_controller_ConstructGraph(nn_id);
183   return nn_id;
184 }
185 
hexagon_controller_ExecuteGraphWithMultipleInOut(const uint32_t nn_id,const int input_count,hexagon_nn_tensordef * inputs,const int output_count,hexagon_nn_tensordef * outputs)186 bool hexagon_controller_ExecuteGraphWithMultipleInOut(
187     const uint32_t nn_id, const int input_count, hexagon_nn_tensordef* inputs,
188     const int output_count, hexagon_nn_tensordef* outputs) {
189   if (DBG_EXECUTION) {
190     TFMLOGD("Preparing to execute... in = %d, out = %d", input_count,
191             output_count);
192     LogDHexagon("Execute graph!");
193   }
194 
195   const int err =
196       hexagon_nn_execute_new(nn_id, inputs, input_count, outputs, output_count);
197   if (err != 0) {
198     if (DBG_EXECUTION) {
199       LogDHexagon("Execution failed!");
200       TFMLOGE("execute got err: %d\n", err);
201       DumpNNId(nn_id);
202     }
203     return false;
204   } else {
205     if (DBG_EXECUTION) {
206       LogDHexagon("Execution succeeded!");
207     }
208     return true;
209   }
210 }
211 
hexagon_controller_ExecuteGraph(const uint32_t nn_id,const uint32_t batches,const uint32_t height,const uint32_t width,const uint32_t depth,uint8_t * int_data,const uint32_t int_data_size,uint32_t * out_batches,uint32_t * out_height,uint32_t * out_width,uint32_t * out_depth,uint8_t * out_vals,const uint32_t output_val_byte_size,uint32_t * out_data_byte_size)212 bool hexagon_controller_ExecuteGraph(
213     const uint32_t nn_id, const uint32_t batches, const uint32_t height,
214     const uint32_t width, const uint32_t depth, uint8_t* int_data,
215     const uint32_t int_data_size, uint32_t* out_batches, uint32_t* out_height,
216     uint32_t* out_width, uint32_t* out_depth, uint8_t* out_vals,
217     const uint32_t output_val_byte_size, uint32_t* out_data_byte_size) {
218   if (DBG_EXECUTION) {
219     TFMLOGD("Preparing to execute...");
220     TFMLOGD("Input: %d, %d, %d, %d, %d, %d", batches, height, width, depth,
221             int_data[0], int_data_size);
222     TFMLOGD("Output: %d, %p", output_val_byte_size, out_vals);
223     LogDHexagon("Execute graph!");
224   }
225 
226   hexagon_nn_tensordef input;
227   hexagon_nn_tensordef output;
228 
229   input.batches = batches;
230   input.height = height;
231   input.width = width;
232   input.depth = depth;
233   input.data = int_data;
234   input.dataLen = int_data_size;
235 
236   output.data = out_vals;
237   output.dataLen = output_val_byte_size;
238 
239   if (!hexagon_controller_ExecuteGraphWithMultipleInOut(nn_id, 1, &input, 1,
240                                                         &output)) {
241     return false;
242   } else {
243     *out_batches = output.batches;
244     *out_height = output.height;
245     *out_width = output.width;
246     *out_depth = output.depth;
247     *out_data_byte_size = output.dataLen;
248 
249     if (DBG_EXECUTION) {
250       LogDHexagon("Execution succeeded!");
251       TFMLOGD("%d x %d x %d x %d, byte size = %d\n", *out_batches, *out_height,
252               *out_width, *out_depth, *out_data_byte_size);
253     }
254     return true;
255   }
256 }
257 
hexagon_controller_ExecuteInceptionDummyData(uint32_t nn_id)258 bool hexagon_controller_ExecuteInceptionDummyData(uint32_t nn_id) {
259   uint32_t out_batches, out_height, out_width, out_depth;
260   uint32_t out_data_size;
261   // s_output_values = 300 * 300 * 3 * 4 * 4
262   const bool success = hexagon_controller_ExecuteGraph(
263       nn_id, INCEPTION_PARAM_BATCHES, INCEPTION_PARAM_HEIGHT_V3,
264       INCEPTION_PARAM_WIDTH_V3, INCEPTION_PARAM_DEPTH,
265       (uint8_t*)inception_dummy_int_data_299x299,
266       INCEPTION_PARAM_HEIGHT_V3 * INCEPTION_PARAM_WIDTH_V3 *
267           INCEPTION_PARAM_DEPTH,
268       &out_batches, &out_height, &out_width, &out_depth,
269       (uint8_t*)s_output_values, sizeof(s_output_values), &out_data_size);
270   if (success) {
271     int out_ranking[OUT_RANKING_SIZE];
272     hexagon_controller_PrintMaxNIdx(
273         s_output_values, out_batches * out_height * out_width * out_depth,
274         OUT_RANKING_SIZE, out_ranking);
275     TFMLOGD("%d x %d x %d x %d, size = %d\n", out_batches, out_height,
276             out_width, out_depth, out_data_size);
277     TFMLOGD("max idx: %d\n",
278             FindMaxIdx(s_output_values,
279                        out_batches * out_height * out_width * out_depth));
280     if (out_ranking[0] == 169 && out_ranking[1] == 7) {
281       return true;
282     } else {
283       TFMLOGD("Result is wrong! %d, %d", out_ranking[0], out_ranking[1]);
284       return false;
285     }
286   } else {
287     return false;
288   }
289 }
290 
hexagon_controller_DumpPerf(uint32_t nn_id)291 void hexagon_controller_DumpPerf(uint32_t nn_id) {
292   hexagon_nn_perfinfo info[MAX_NODES];
293   unsigned long long int total_cycles = 0;
294   unsigned long long int cum_cycles = 0;
295   unsigned long long int counter = 0;
296   unsigned int n_nodes;
297   int i;
298   TFMLOGD("Perf dump follows:");
299   if (hexagon_nn_get_perfinfo(nn_id, info, MAX_NODES, &n_nodes) != 0) {
300     TFMLOGE("perf info failure");
301     return;
302   }
303   TFMLOGD("Total %d nodes.", n_nodes);
304   qsort(info, n_nodes, sizeof(info[0]), CompareCycle);
305   for (i = 0; i < n_nodes; i++) {
306     total_cycles += GetCounter(info[i]);
307   }
308   TFMLOGD("Total %lld cycles.", total_cycles);
309   for (i = 0; i < n_nodes; i++) {
310     counter = GetCounter(info[i]);
311     cum_cycles += counter;
312     TFMLOGD(
313         "node,0x%x,%s,%s,executions,%d,cycles,%lld,%f %%,"
314         "cum_cycles,%lld,%f %%\n",
315         info[i].node_id, ConvertGraphInfoIdToName(info[i].node_id),
316         ConvertGraphInfoIdToOpName(info[i].node_id), info[i].executions,
317         counter, 100 * ((double)counter) / total_cycles, cum_cycles,
318         100 * ((double)cum_cycles) / total_cycles);
319   }
320 #ifdef ENABLE_HVX_FULL_DEBUG
321   DumpAllPerf(nn_id);
322 #endif
323 }
324 
hexagon_controller_DumpNodeName(uint32_t nn_id)325 void hexagon_controller_DumpNodeName(uint32_t nn_id) {
326   TFMLOGD("Show node name");
327   const uint32_t id = nn_id;
328   hexagon_nn_perfinfo info[MAX_NODES];
329   unsigned long long int total_cycles = 0;
330   unsigned long long int cum_cycles = 0;
331   unsigned long long int counter = 0;
332   unsigned int node_count;
333   int i;
334   TFMLOGD("Perf dump follows:");
335   if (hexagon_nn_get_perfinfo(id, info, MAX_NODES, &node_count) != 0) {
336     TFMLOGD("perf info failure");
337     return;
338   }
339   TFMLOGD("Total %d nodes.", node_count);
340   qsort(info, node_count, sizeof(info[0]), CompareCycle);
341   for (i = 0; i < node_count; i++) {
342     total_cycles += GetCounter(info[i]);
343   }
344   TFMLOGD("Total %lld cycles.", total_cycles);
345   for (i = 0; i < node_count; i++) {
346     counter = GetCounter(info[i]);
347     cum_cycles += counter;
348     TFMLOGD(
349         "node,0x%x,%s,%s,executions,%d,cycles,%lld,%f %%,"
350         "cum_cycles,%lld,%f %%",
351         info[i].node_id, ConvertGraphInfoIdToName(info[i].node_id),
352         ConvertGraphInfoIdToOpName(info[i].node_id), info[i].executions,
353         counter, 100 * ((double)counter) / total_cycles, cum_cycles,
354         100 * ((double)cum_cycles) / total_cycles);
355   }
356 }
357 
hexagon_controller_Teardown(uint32_t nn_id)358 void hexagon_controller_Teardown(uint32_t nn_id) { hexagon_nn_teardown(nn_id); }
359