1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <assert.h>
7 #include <math.h>
8 #include <stddef.h>
9 #include <stdint.h>
10 #include <stdlib.h>
11
12 #include <xnnpack.h>
13 #include <xnnpack/allocator.h>
14 #include <xnnpack/log.h>
15 #include <xnnpack/operator.h>
16 #include <xnnpack/params-init.h>
17 #include <xnnpack/params.h>
18
19
create_constant_pad_nd(uint32_t padding_value,uint32_t flags,enum xnn_operator_type operator_type,xnn_operator_t * constant_pad_op_out)20 static enum xnn_status create_constant_pad_nd(
21 uint32_t padding_value,
22 uint32_t flags,
23 enum xnn_operator_type operator_type,
24 xnn_operator_t* constant_pad_op_out)
25 {
26 xnn_operator_t constant_pad_op = NULL;
27 enum xnn_status status = xnn_status_uninitialized;
28
29 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
30 xnn_log_error(
31 "failed to create %s operator: XNNPACK is not initialized",
32 xnn_operator_type_to_string(xnn_operator_type_constant_pad_nd_x32));
33 goto error;
34 }
35
36 status = xnn_status_out_of_memory;
37
38 constant_pad_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
39 if (constant_pad_op == NULL) {
40 xnn_log_error(
41 "failed to allocate %zu bytes for %s operator descriptor",
42 sizeof(struct xnn_operator), xnn_operator_type_to_string(xnn_operator_type_constant_pad_nd_x32));
43 goto error;
44 }
45
46 constant_pad_op->pad_value = padding_value;
47
48 constant_pad_op->type = operator_type;
49
50 constant_pad_op->state = xnn_run_state_invalid;
51
52 *constant_pad_op_out = constant_pad_op;
53 return xnn_status_success;
54
55 error:
56 xnn_delete_operator(constant_pad_op);
57 return status;
58 }
59
xnn_create_constant_pad_nd_x32(const void * padding_value,uint32_t flags,xnn_operator_t * constant_pad_op_out)60 enum xnn_status xnn_create_constant_pad_nd_x32(
61 const void* padding_value,
62 uint32_t flags,
63 xnn_operator_t* constant_pad_op_out)
64 {
65 return create_constant_pad_nd(
66 *((uint32_t*) padding_value), flags, xnn_operator_type_constant_pad_nd_x32, constant_pad_op_out);
67 }
68
setup_constant_pad_nd(xnn_operator_t constant_pad_op,enum xnn_operator_type expected_operator_type,size_t num_dims,const size_t * input_shape,const size_t * pre_paddings,const size_t * post_paddings,const void * input,void * output,size_t num_threads)69 static enum xnn_status setup_constant_pad_nd(
70 xnn_operator_t constant_pad_op,
71 enum xnn_operator_type expected_operator_type,
72 size_t num_dims,
73 const size_t* input_shape,
74 const size_t* pre_paddings,
75 const size_t* post_paddings,
76 const void* input,
77 void* output,
78 size_t num_threads)
79 {
80 if (constant_pad_op->type != expected_operator_type) {
81 xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
82 xnn_operator_type_to_string(expected_operator_type),
83 xnn_operator_type_to_string(constant_pad_op->type));
84 return xnn_status_invalid_parameter;
85 }
86 constant_pad_op->state = xnn_run_state_invalid;
87
88 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
89 xnn_log_error("failed to setup %s operator: XNNPACK is not initialized",
90 xnn_operator_type_to_string(constant_pad_op->type));
91 return xnn_status_uninitialized;
92 }
93
94 if (num_dims > XNN_MAX_TENSOR_DIMS) {
95 xnn_log_error(
96 "failed to setup %s operator with %zu dimensions in input shape: "
97 "the number of input dimensions must not exceed %d",
98 xnn_operator_type_to_string(constant_pad_op->type), num_dims, XNN_MAX_TENSOR_DIMS);
99 return xnn_status_unsupported_parameter;
100 }
101
102 for (size_t i = 0; i < num_dims; i++) {
103 if (input_shape[i] == 0) {
104 xnn_log_error(
105 "failed to setup %s operator: input shape dimension #%zu is zero",
106 xnn_operator_type_to_string(constant_pad_op->type), i);
107 return xnn_status_invalid_parameter;
108 }
109 }
110
111 size_t num_squeezed_dims = 0;
112 size_t normalized_pre_paddings[XNN_MAX_TENSOR_DIMS];
113 size_t normalized_input_shape[XNN_MAX_TENSOR_DIMS];
114 size_t normalized_output_shape[XNN_MAX_TENSOR_DIMS];
115 for (size_t i = 0; i < XNN_MAX_TENSOR_DIMS; i++) {
116 normalized_pre_paddings[i] = 0;
117 normalized_input_shape[i] = 1;
118 normalized_output_shape[i] = 1;
119 }
120
121 bool is_previous_dim_padded = true;
122 for (size_t i = 0; i < num_dims; i++) {
123 const size_t pre_padding = pre_paddings[num_dims - 1 - i];
124 const size_t post_padding = post_paddings[num_dims - 1 - i];
125 const size_t input_dim = input_shape[num_dims - 1 - i];
126
127 const bool is_current_dim_padded = (pre_padding | post_padding) != 0;
128 if (is_current_dim_padded || is_previous_dim_padded) {
129 normalized_pre_paddings[XNN_MAX_TENSOR_DIMS - 1 - num_squeezed_dims] = pre_padding;
130 normalized_input_shape[XNN_MAX_TENSOR_DIMS - 1 - num_squeezed_dims] = input_dim;
131 normalized_output_shape[XNN_MAX_TENSOR_DIMS - 1 - num_squeezed_dims] = pre_padding + input_dim + post_padding;
132
133 num_squeezed_dims += 1;
134 is_previous_dim_padded = is_current_dim_padded;
135 } else {
136 assert(!is_previous_dim_padded);
137 assert(pre_padding == 0);
138 assert(post_padding == 0);
139 assert(i != 0);
140
141 normalized_input_shape[XNN_MAX_TENSOR_DIMS - num_squeezed_dims] *= input_dim;
142 normalized_output_shape[XNN_MAX_TENSOR_DIMS - num_squeezed_dims] *= input_dim;
143 }
144 }
145
146 constant_pad_op->context.pad = (struct pad_context) {
147 .input = input,
148 .output = output,
149 .padding_value = constant_pad_op->pad_value,
150 .fill_ukernel = xnn_params.x32.fill.ukernel,
151 .pad_ukernel = xnn_params.x32.pad.ukernel,
152 };
153
154 for (size_t i = 0; i < XNN_MAX_TENSOR_DIMS; i++) {
155 constant_pad_op->context.pad.pre_paddings[i] = normalized_pre_paddings[XNN_MAX_TENSOR_DIMS - 1 - i];
156 constant_pad_op->context.pad.input_size[i] = normalized_input_shape[XNN_MAX_TENSOR_DIMS - 1 - i];
157 }
158 size_t input_stride = normalized_input_shape[XNN_MAX_TENSOR_DIMS - 1];
159 size_t output_stride = normalized_output_shape[XNN_MAX_TENSOR_DIMS - 1];
160 for (size_t i = 1; i < XNN_MAX_TENSOR_DIMS; i++) {
161 constant_pad_op->context.pad.input = (const void*)
162 ((uintptr_t) constant_pad_op->context.pad.input - constant_pad_op->context.pad.pre_paddings[i] * input_stride * sizeof(float));
163 constant_pad_op->context.pad.input_stride[i - 1] = input_stride * sizeof(float);
164 constant_pad_op->context.pad.output_stride[i - 1] = output_stride * sizeof(float);
165 input_stride *= normalized_input_shape[XNN_MAX_TENSOR_DIMS - 1 - i];
166 output_stride *= normalized_output_shape[XNN_MAX_TENSOR_DIMS - 1 - i];
167 }
168 constant_pad_op->context.pad.input_size[0] *= sizeof(float);
169 constant_pad_op->context.pad.output_size[0] = normalized_output_shape[XNN_MAX_TENSOR_DIMS - 1] * sizeof(float);
170 constant_pad_op->context.pad.pre_paddings[0] *= sizeof(float);
171 constant_pad_op->context.pad.post_paddings[0] =
172 constant_pad_op->context.pad.output_size[0] - constant_pad_op->context.pad.pre_paddings[0] - constant_pad_op->context.pad.input_size[0];
173
174 constant_pad_op->compute.type = xnn_parallelization_type_5d;
175 constant_pad_op->compute.task_5d = (pthreadpool_task_5d_t) xnn_compute_pad_5d;
176 constant_pad_op->compute.range[0] = normalized_output_shape[0];
177 constant_pad_op->compute.range[1] = normalized_output_shape[1];
178 constant_pad_op->compute.range[2] = normalized_output_shape[2];
179 constant_pad_op->compute.range[3] = normalized_output_shape[3];
180 constant_pad_op->compute.range[4] = normalized_output_shape[4];
181 constant_pad_op->state = xnn_run_state_ready;
182
183 return xnn_status_success;
184 }
185
xnn_setup_constant_pad_nd_x32(xnn_operator_t constant_pad_op,size_t num_dims,const size_t * input_shape,const size_t * pre_padding,const size_t * post_padding,const void * input,void * output,pthreadpool_t threadpool)186 enum xnn_status xnn_setup_constant_pad_nd_x32(
187 xnn_operator_t constant_pad_op,
188 size_t num_dims,
189 const size_t* input_shape,
190 const size_t* pre_padding,
191 const size_t* post_padding,
192 const void* input,
193 void* output,
194 pthreadpool_t threadpool)
195 {
196 return setup_constant_pad_nd(
197 constant_pad_op, xnn_operator_type_constant_pad_nd_x32,
198 num_dims, input_shape, pre_padding, post_padding,
199 input, output,
200 pthreadpool_get_threads_count(threadpool));
201 }
202