1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <assert.h>
7 #include <math.h>
8 #include <stdbool.h>
9 #include <stddef.h>
10 #include <stdint.h>
11 #include <stdlib.h>
12 #include <string.h>
13
14 #include <xnnpack.h>
15 #include <xnnpack/allocator.h>
16 #include <xnnpack/operator.h>
17 #include <xnnpack/log.h>
18 #include <xnnpack/common.h>
19 #include <xnnpack/math.h>
20 #include <xnnpack/params.h>
21 #include <xnnpack/indirection.h>
22
xnn_create_resize_bilinear2d_nhwc_f32(size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,uint32_t flags,xnn_operator_t * resize_op_out)23 enum xnn_status xnn_create_resize_bilinear2d_nhwc_f32(
24 size_t channels,
25 size_t input_pixel_stride,
26 size_t output_pixel_stride,
27 uint32_t flags,
28 xnn_operator_t* resize_op_out)
29 {
30 xnn_operator_t resize_op = NULL;
31 enum xnn_status status = xnn_status_uninitialized;
32
33 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
34 xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
35 xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nhwc_f32));
36 goto error;
37 }
38
39 status = xnn_status_invalid_parameter;
40
41 if (channels == 0) {
42 xnn_log_error(
43 "failed to create %s operator with %zu channels: number of channels must be non-zero",
44 xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nhwc_f32), channels);
45 goto error;
46 }
47
48 if (input_pixel_stride < channels) {
49 xnn_log_error(
50 "failed to create %s operator with input pixel stride of %zu: "
51 "stride must be at least as large as the number of channels (%zu)",
52 xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nhwc_f32), input_pixel_stride, channels);
53 goto error;
54 }
55
56 if (output_pixel_stride < channels) {
57 xnn_log_error(
58 "failed to create %s operator with output pixel stride of %zu: "
59 "stride must be at least as large as the number of channels (%zu)",
60 xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nhwc_f32), output_pixel_stride, channels);
61 goto error;
62 }
63
64 status = xnn_status_out_of_memory;
65
66 resize_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
67 if (resize_op == NULL) {
68 xnn_log_error(
69 "failed to allocate %zu bytes for %s operator descriptor",
70 sizeof(struct xnn_operator), xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nhwc_f32));
71 goto error;
72 }
73
74 resize_op->channels = channels;
75 resize_op->input_pixel_stride = input_pixel_stride;
76 resize_op->output_pixel_stride = output_pixel_stride;
77
78 resize_op->type = xnn_operator_type_resize_bilinear_nhwc_f32;
79 resize_op->flags = flags;
80
81 resize_op->state = xnn_run_state_invalid;
82
83 *resize_op_out = resize_op;
84 return xnn_status_success;
85
86 error:
87 xnn_delete_operator(resize_op);
88 return status;
89 }
90
xnn_setup_resize_bilinear2d_nhwc_f32(xnn_operator_t resize_op,size_t batch_size,size_t input_height,size_t input_width,size_t output_height,size_t output_width,const float * input,float * output,pthreadpool_t threadpool)91 enum xnn_status xnn_setup_resize_bilinear2d_nhwc_f32(
92 xnn_operator_t resize_op,
93 size_t batch_size,
94 size_t input_height,
95 size_t input_width,
96 size_t output_height,
97 size_t output_width,
98 const float* input,
99 float* output,
100 pthreadpool_t threadpool)
101 {
102 if (resize_op->type != xnn_operator_type_resize_bilinear_nhwc_f32) {
103 xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
104 xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nhwc_f32),
105 xnn_operator_type_to_string(resize_op->type));
106 return xnn_status_invalid_parameter;
107 }
108 resize_op->state = xnn_run_state_invalid;
109
110 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
111 xnn_log_error("failed to setup %s operator: XNNPACK is not initialized",
112 xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nhwc_f32));
113 return xnn_status_uninitialized;
114 }
115
116 if (input_width == 0 || input_height == 0) {
117 xnn_log_error(
118 "failed to setup %s operator with %zux%zu input: input dimensions must be non-zero",
119 xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nhwc_f32), input_width, input_height);
120 return xnn_status_invalid_parameter;
121 }
122
123 if (max(input_width, input_height) >= 16777216) {
124 xnn_log_error(
125 "failed to setup %s operator with %zux%zu input: input dimensions must be below 2**24",
126 xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nhwc_f32), input_width, input_height);
127 return xnn_status_unsupported_parameter;
128 }
129
130 if (output_width == 0 || output_height == 0) {
131 xnn_log_error(
132 "failed to setup %s operator with %zux%zu output: output dimensions must be non-zero",
133 xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nhwc_f32), output_width, output_height);
134 return xnn_status_invalid_parameter;
135 }
136
137 if (max(output_width, output_height) >= 16777216) {
138 xnn_log_error(
139 "failed to setup %s operator with %zux%zu output: output dimensions must be below 2**24",
140 xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nhwc_f32), output_width, output_height);
141 return xnn_status_unsupported_parameter;
142 }
143
144 if (batch_size == 0) {
145 resize_op->state = xnn_run_state_skip;
146 return xnn_status_success;
147 }
148
149 if (output_height * output_width != resize_op->last_output_height * resize_op->last_output_width) {
150 const size_t indirection_buffer_size = sizeof(void*) * (output_height * output_width * 4);
151 const size_t packed_weights_size = sizeof(float) * (output_height * output_width * 2);
152
153 const void** indirection_buffer = (const void**) xnn_reallocate_memory(resize_op->indirection_buffer, indirection_buffer_size);
154 if (indirection_buffer == NULL) {
155 xnn_log_error(
156 "failed to allocate %zu bytes for %s operator indirection buffer",
157 indirection_buffer_size, xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nhwc_f32));
158 return xnn_status_out_of_memory;
159 }
160 resize_op->indirection_buffer = indirection_buffer;
161
162 // Note: packed weights must be SIMD-aligned, so we can't use xnn_reallocate_memory
163 xnn_release_simd_memory(resize_op->packed_weights);
164 resize_op->packed_weights = xnn_allocate_simd_memory(packed_weights_size);
165 if (resize_op->packed_weights == NULL) {
166 xnn_log_error(
167 "failed to allocate %zu bytes for %s operator packed weights",
168 packed_weights_size, xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nhwc_f32));
169 return xnn_status_out_of_memory;
170 }
171 }
172
173 const size_t input_pixel_stride_in_bytes = resize_op->input_pixel_stride * sizeof(float);
174 if (input_height != resize_op->last_input_height ||
175 input_width != resize_op->last_input_width ||
176 output_height != resize_op->last_output_height ||
177 output_width != resize_op->last_output_width)
178 {
179 const uint32_t flags = resize_op->flags;
180 xnn_indirection_init_resize_bilinear2d_hwc_f32(
181 input_pixel_stride_in_bytes,
182 input_height, input_width,
183 output_height, output_width,
184 input, resize_op->indirection_buffer, resize_op->packed_weights,
185 !!(flags & XNN_FLAG_ALIGN_CORNERS),
186 !!(flags & XNN_FLAG_TENSORFLOW_LEGACY_MODE));
187
188 resize_op->last_input = input;
189 resize_op->last_input_height = input_height;
190 resize_op->last_input_width = input_width;
191 resize_op->last_output_height = output_height;
192 resize_op->last_output_width = output_width;
193 }
194
195 const size_t output_pixel_stride_in_bytes = resize_op->output_pixel_stride * sizeof(float);
196 resize_op->context.resize_bilinear = (struct resize_bilinear_context) {
197 .scaled_channels = resize_op->channels * sizeof(float),
198 .indirect_input = resize_op->indirection_buffer,
199 .input_offset = (size_t) ((uintptr_t) input - (uintptr_t) resize_op->last_input),
200 .input_batch_stride = input_pixel_stride_in_bytes * input_height * input_width,
201 .packed_weights = resize_op->packed_weights,
202 .output = output,
203 .output_pixel_stride = output_pixel_stride_in_bytes,
204 .output_batch_stride = output_pixel_stride_in_bytes * output_height * output_width,
205 .log2_wsize = 3 /* log2(2 * sizeof(float)) */,
206 .ukernel = xnn_params.f32.ibilinear.ukernel,
207 };
208
209 const size_t output_size = output_height * output_width;
210 size_t output_size_tile = output_size;
211 const size_t num_threads = pthreadpool_get_threads_count(threadpool);
212 if (num_threads > 1) {
213 const size_t target_tiles_per_thread = 5;
214 const size_t max_output_size_tile = divide_round_up(output_size, num_threads * target_tiles_per_thread);
215 if (max_output_size_tile < output_size_tile) {
216 const uint32_t output_size_subtile = xnn_params.f32.ibilinear.pixel_tile;
217 output_size_tile =
218 min(output_size_tile,
219 divide_round_up(output_size_tile, max_output_size_tile * output_size_subtile) * output_size_subtile);
220 }
221 }
222 resize_op->compute.type = xnn_parallelization_type_2d_tile_1d;
223 resize_op->compute.task_2d_tile_1d = (pthreadpool_task_2d_tile_1d_t) xnn_compute_resize_bilinear;
224 resize_op->compute.range[0] = batch_size;
225 resize_op->compute.range[1] = output_size;
226 resize_op->compute.tile[0] = output_size_tile;
227 resize_op->state = xnn_run_state_ready;
228
229 return xnn_status_success;
230 }
231