1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #pragma once
7 
8 #include <stddef.h>
9 #include <stdint.h>
10 
11 #include <xnnpack.h>
12 
13 #define XNN_MAX_INPUTS 3
14 #define XNN_MAX_OUTPUTS 2
15 
16 #define XNN_MAX_RUNTIME_INPUTS 2
17 #define XNN_MAX_RUNTIME_OUTPUTS 2
18 
19 #define XNN_INVALID_NODE_ID UINT32_MAX
20 
21 #ifdef __cplusplus
22 extern "C" {
23 #endif
24 
25 struct xnn_shape {
26   size_t num_dims;
27   size_t dim[XNN_MAX_TENSOR_DIMS];
28 };
29 
30 enum xnn_value_type {
31   xnn_value_type_invalid = 0,
32   xnn_value_type_dense_tensor = 1,
33 };
34 
35 enum xnn_layout_type {
36   xnn_layout_type_nhwc = 0,
37   xnn_layout_type_nchw = 1,
38 };
39 
40 /// Abstraction for a collections of elements produced and consumed by nodes.
41 struct xnn_value {
42   /// Unique ID for the value.
43   uint32_t id;
44   /// Type of the collection of elements.
45   ///
46   /// Currently only dense tensors are supported.
47   /// Other types (e.g. sparse tensors) might be supported in the future.
48   enum xnn_value_type type;
49   /// Type of elements in the collection.
50   enum xnn_datatype datatype;
51   /// Tensor shape.
52   struct xnn_shape shape;
53   /// Binary features of the tensor. Supported values are any combination of:
54   /// - XNN_VALUE_FLAG_EXTERNAL_INPUT
55   /// - XNN_VALUE_FLAG_EXTERNAL_OUTPUT
56   uint32_t flags;
57   /// Static initialization data. Must be null for non-static values.
58   const void* data;
59   /// Index of the Subgraph node that produced the value, or XNN_INVALID_NODE_ID is the Value is an external input.
60   uint32_t producer;
61   /// Index of the first Node that consume the value, or XNN_INVALID_NODE_ID if the Value has no consumers within the
62   /// graph (e.g. Value is an external output).
63   uint32_t first_consumer;
64   /// Number of Nodes that consume the value.
65   /// If multiple inputs in a Node refer to this Value as input, the Node is counted as consumer multiple times.
66   /// If the Value is an external output, it counts as having an extra consumer.
67   uint32_t num_consumers;
68   uint32_t num_nchw_compatible_consumers;
69   enum xnn_layout_type layout;
70 };
71 
72 struct xnn_blob {
73   /// Size in bytes.
74   size_t size;
75   /// Data pointer.
76   void* data;
77   bool external;
78 };
79 
80 enum xnn_node_type {
81   xnn_node_type_invalid = 0,
82   xnn_node_type_abs,
83   xnn_node_type_add2,
84   xnn_node_type_argmax_pooling_2d,
85   xnn_node_type_average_pooling_2d,
86   xnn_node_type_bankers_rounding,
87   xnn_node_type_ceiling,
88   xnn_node_type_clamp,
89   xnn_node_type_convolution_2d,
90   xnn_node_type_deconvolution_2d,
91   xnn_node_type_depthwise_convolution_2d,
92   xnn_node_type_depth_to_space,
93   xnn_node_type_divide,
94   xnn_node_type_elu,
95   xnn_node_type_fully_connected,
96   xnn_node_type_floor,
97   xnn_node_type_global_average_pooling_2d,
98   xnn_node_type_hardswish,
99   xnn_node_type_leaky_relu,
100   xnn_node_type_max_pooling_2d,
101   xnn_node_type_maximum2,
102   xnn_node_type_minimum2,
103   xnn_node_type_multiply2,
104   xnn_node_type_negate,
105   xnn_node_type_prelu,
106   xnn_node_type_sigmoid,
107   xnn_node_type_softmax,
108   xnn_node_type_static_constant_pad,
109   xnn_node_type_static_reshape,
110   xnn_node_type_static_resize_bilinear_2d,
111   xnn_node_type_square,
112   xnn_node_type_square_root,
113   xnn_node_type_squared_difference,
114   xnn_node_type_subtract,
115   xnn_node_type_unpooling_2d,
116 };
117 
118 struct xnn_node {
119   enum xnn_node_type type;
120   uint32_t id;
121   /// Static parameters of the operator node.
122   union {
123     struct {
124       uint32_t input_padding_top;
125       uint32_t input_padding_right;
126       uint32_t input_padding_bottom;
127       uint32_t input_padding_left;
128       uint32_t kernel_height;
129       uint32_t kernel_width;
130       uint32_t subsampling_height;
131       uint32_t subsampling_width;
132       uint32_t dilation_height;
133       uint32_t dilation_width;
134       uint32_t groups;
135       size_t group_input_channels;
136       size_t group_output_channels;
137     } convolution_2d;
138     struct {
139       uint32_t padding_top;
140       uint32_t padding_right;
141       uint32_t padding_bottom;
142       uint32_t padding_left;
143       uint32_t adjustment_height;
144       uint32_t adjustment_width;
145       uint32_t kernel_height;
146       uint32_t kernel_width;
147       uint32_t upsampling_height;
148       uint32_t upsampling_width;
149       uint32_t dilation_height;
150       uint32_t dilation_width;
151       uint32_t groups;
152       size_t group_input_channels;
153       size_t group_output_channels;
154     } deconvolution_2d;
155     struct {
156       uint32_t input_padding_top;
157       uint32_t input_padding_right;
158       uint32_t input_padding_bottom;
159       uint32_t input_padding_left;
160       uint32_t kernel_height;
161       uint32_t kernel_width;
162       uint32_t subsampling_height;
163       uint32_t subsampling_width;
164       uint32_t dilation_height;
165       uint32_t dilation_width;
166       uint32_t depth_multiplier;
167       size_t input_channels;
168     } depthwise_convolution_2d;
169     struct {
170       uint32_t block_size;
171     } depth_to_space;
172     struct {
173       uint32_t padding_top;
174       uint32_t padding_right;
175       uint32_t padding_bottom;
176       uint32_t padding_left;
177       uint32_t pooling_height;
178       uint32_t pooling_width;
179       uint32_t stride_height;
180       uint32_t stride_width;
181       uint32_t dilation_height;
182       uint32_t dilation_width;
183     } pooling_2d;
184     struct {
185       float alpha;
186     } elu;
187     struct {
188       float negative_slope;
189     } leaky_relu;
190     struct {
191       size_t pre_paddings[XNN_MAX_TENSOR_DIMS];
192       size_t post_paddings[XNN_MAX_TENSOR_DIMS];
193       uint32_t padding_value;
194     } static_pad;
195     struct {
196       struct xnn_shape new_shape;
197     } static_reshape;
198     struct {
199       size_t new_height;
200       size_t new_width;
201     } static_resize;
202   } params;
203   struct {
204     float output_min;
205     float output_max;
206   } activation;
207   /// Value IDs for node inputs.
208   uint32_t inputs[XNN_MAX_INPUTS];
209   uint32_t num_inputs;
210   /// Value IDs for node outputs.
211   uint32_t outputs[XNN_MAX_OUTPUTS];
212   uint32_t num_outputs;
213   uint32_t flags;
214   uint32_t layout_flags;
215   uint32_t cluster_leader;
216   // Number of filter parameters in all 1x1 Convolutions of the sparse cluster.
217   // This value is properly initialized only in sparse inference analysis of 1x1 Convolutions.
218   size_t num_params;
219   // Number of zero filter parameters in all 1x1 Convolutions of the sparse cluster.
220   // This value is properly initialized only in sparse inference analysis of 1x1 Convolutions.
221   size_t num_zeroes;
222 };
223 
224 struct xnn_operator_data {
225   xnn_operator_t operator_object;
226   size_t batch_size;
227   size_t input_height;
228   size_t input_width;
229   size_t output_height;
230   size_t output_width;
231   struct xnn_shape shape1;
232   struct xnn_shape shape2;
233   size_t pre_paddings[XNN_MAX_TENSOR_DIMS];
234   size_t post_paddings[XNN_MAX_TENSOR_DIMS];
235   uint32_t adjustment_height;
236   uint32_t adjustment_width;
237   uint32_t inputs[XNN_MAX_RUNTIME_INPUTS];
238   uint32_t outputs[XNN_MAX_RUNTIME_OUTPUTS];
239 };
240 
241 struct xnn_subgraph {
242   /// Number of Value IDs reserved for communication with external graph representation.
243   /// Values created during subgraph transformation avoid using IDs in [0, reserved_value_ids-1] range.
244   uint32_t external_value_ids;
245 
246   uint32_t num_reserved_values;
247   uint32_t num_values;
248   struct xnn_value* values;
249 
250   uint32_t num_reserved_nodes;
251   uint32_t num_nodes;
252   struct xnn_node* nodes;
253 };
254 
255 /// Runtime is a combination of an execution plan for subgraph Nodes and a memory manager for subgraph Values.
256 struct xnn_runtime {
257   uint32_t num_external_values;
258 
259   /// List of operators in the execution plan, in execution order.
260   struct xnn_operator_data* opdata;
261   /// Number of operators in the execution plan.
262   size_t num_ops;
263 
264   struct xnn_blob* blobs;
265   size_t num_blobs;
266 
267   void* workspace;
268 
269   pthreadpool_t threadpool;
270 };
271 
272 struct xnn_value* xnn_subgraph_new_internal_value(xnn_subgraph_t subgraph);
273 
274 struct xnn_node* xnn_subgraph_new_node(xnn_subgraph_t subgraph);
275 
276 size_t xnn_tensor_get_size(
277   xnn_subgraph_t subgraph,
278   uint32_t value_id);
279 
280 enum xnn_status xnn_subgraph_optimize(xnn_subgraph_t subgraph, uint32_t flags);
281 
282 void xnn_subgraph_rewrite_for_nchw(xnn_subgraph_t subgraph);
283 
284 void xnn_node_clear(struct xnn_node* node);
285 void xnn_value_clear(struct xnn_value* value);
286 
287 
288 #ifdef __cplusplus
289 }  // extern "C"
290 #endif
291