1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8
9 #include <assert.h>
10 #include <math.h>
11 #include <stddef.h>
12 #include <stdint.h>
13 #include <stdlib.h>
14
15 #include <xnnpack.h>
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/operator.h>
18 #include <xnnpack/log.h>
19
20
xnn_create_leaky_relu_nc_qu8(size_t channels,size_t input_stride,size_t output_stride,float negative_slope,uint8_t input_zero_point,float input_scale,uint8_t output_zero_point,float output_scale,uint8_t output_min,uint8_t output_max,uint32_t flags,xnn_operator_t * leaky_relu_op_out)21 enum xnn_status xnn_create_leaky_relu_nc_qu8(
22 size_t channels,
23 size_t input_stride,
24 size_t output_stride,
25 float negative_slope,
26 uint8_t input_zero_point,
27 float input_scale,
28 uint8_t output_zero_point,
29 float output_scale,
30 uint8_t output_min,
31 uint8_t output_max,
32 uint32_t flags,
33 xnn_operator_t* leaky_relu_op_out)
34 {
35 xnn_operator_t leaky_relu_op = NULL;
36 enum xnn_status status = xnn_status_uninitialized;
37
38 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
39 xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
40 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8));
41 goto error;
42 }
43
44 status = xnn_status_invalid_parameter;
45
46 if (channels == 0) {
47 xnn_log_error(
48 "failed to create %s operator with %zu channels: number of channels must be non-zero",
49 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8), channels);
50 goto error;
51 }
52
53 if (input_stride < channels) {
54 xnn_log_error(
55 "failed to create %s operator with input element stride of %zu: "
56 "stride must be at least as large as the number of channels (%zu)",
57 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8), input_stride, channels);
58 goto error;
59 }
60
61 if (output_stride < channels) {
62 xnn_log_error(
63 "failed to create %s operator with output element stride of %zu: "
64 "stride must be at least as large as the number of channels (%zu)",
65 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8), output_stride, channels);
66 goto error;
67 }
68
69 if (negative_slope <= 0.0f || !isnormal(negative_slope)) {
70 xnn_log_error(
71 "failed to create %s operator with %.7g negative slope: slope must be finite, normalized, and positive",
72 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8), negative_slope);
73 goto error;
74 }
75
76 if (negative_slope > 1.0f) {
77 xnn_log_error(
78 "failed to create %s operator with %.7g negative slope: slope must not exceed 1.0",
79 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8), negative_slope);
80 goto error;
81 }
82
83 if (input_scale <= 0.0f || !isnormal(input_scale)) {
84 xnn_log_error(
85 "failed to create %s operator with %.7g input scale: scale must be finite, normalized, and positive",
86 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8), input_scale);
87 goto error;
88 }
89
90 if (output_scale <= 0.0f || !isnormal(output_scale)) {
91 xnn_log_error(
92 "failed to create %s operator with %.7g output scale: scale must be finite, normalized, and positive",
93 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8), output_scale);
94 goto error;
95 }
96
97 if (output_min >= output_max) {
98 xnn_log_error(
99 "failed to create %s operator with [%" PRIu8 ", %" PRIu8 "] output range: range min must be below range max",
100 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8), output_min, output_max);
101 goto error;
102 }
103
104 status = xnn_status_unsupported_parameter;
105
106 const float input_output_scale = input_scale / output_scale;
107 if (input_output_scale < 0x1.0p-8f || input_output_scale >= 0x1.0p+8f) {
108 xnn_log_error(
109 "failed to create %s operator with %.7g input-to-output scale ratio: "
110 "scale ratio must be in [2**-8, 2**8) range",
111 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8), input_output_scale);
112 goto error;
113 }
114
115 status = xnn_status_out_of_memory;
116
117 leaky_relu_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
118 if (leaky_relu_op == NULL) {
119 xnn_log_error(
120 "failed to allocate %zu bytes for %s operator descriptor",
121 sizeof(struct xnn_operator), xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8));
122 goto error;
123 }
124
125 leaky_relu_op->lookup_table = xnn_allocate_simd_memory(256 * sizeof(uint8_t));
126 if (leaky_relu_op->lookup_table == NULL) {
127 xnn_log_error(
128 "failed to allocate 256 bytes for %s operator lookup table",
129 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8));
130 goto error;
131 }
132
133 uint8_t* lookup_table = leaky_relu_op->lookup_table;
134 const float scaled_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
135 const float scaled_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
136 for (int32_t i = 0; i < 256; i++) {
137 const float x = input_output_scale * (float) (i - (int32_t) (uint32_t) input_zero_point);
138 float y = x < 0.0f ? x * negative_slope : x;
139 if (y < scaled_min_less_zero_point) {
140 y = scaled_min_less_zero_point;
141 }
142 if (y > scaled_max_less_zero_point) {
143 y = scaled_max_less_zero_point;
144 }
145 lookup_table[(uint32_t) i] = (uint8_t) (lrintf(y) + (long) output_zero_point);
146 }
147
148 leaky_relu_op->channels = channels;
149 leaky_relu_op->input_pixel_stride = input_stride;
150 leaky_relu_op->output_pixel_stride = output_stride;
151
152 leaky_relu_op->type = xnn_operator_type_leaky_relu_nc_qu8;
153
154 leaky_relu_op->state = xnn_run_state_invalid;
155
156 *leaky_relu_op_out = leaky_relu_op;
157 return xnn_status_success;
158
159 error:
160 xnn_delete_operator(leaky_relu_op);
161 return status;
162 }
163
xnn_setup_leaky_relu_nc_qu8(xnn_operator_t leaky_relu_op,size_t batch_size,const uint8_t * input,uint8_t * output,pthreadpool_t threadpool)164 enum xnn_status xnn_setup_leaky_relu_nc_qu8(
165 xnn_operator_t leaky_relu_op,
166 size_t batch_size,
167 const uint8_t* input,
168 uint8_t* output,
169 pthreadpool_t threadpool)
170 {
171 if (leaky_relu_op->type != xnn_operator_type_leaky_relu_nc_qu8) {
172 xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
173 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8),
174 xnn_operator_type_to_string(leaky_relu_op->type));
175 return xnn_status_invalid_parameter;
176 }
177 leaky_relu_op->state = xnn_run_state_invalid;
178
179 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
180 xnn_log_error(
181 "failed to setup %s operator: XNNPACK is not initialized",
182 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8));
183 return xnn_status_uninitialized;
184 }
185
186 if (batch_size == 0) {
187 leaky_relu_op->state = xnn_run_state_skip;
188 return xnn_status_success;
189 }
190
191 const size_t channels = leaky_relu_op->channels;
192 const size_t input_stride = leaky_relu_op->input_pixel_stride;
193 const size_t output_stride = leaky_relu_op->output_pixel_stride;
194 if ((((input_stride ^ channels) | (output_stride ^ channels)) == 0) || batch_size == 1) {
195 const size_t block_size = 1024;
196 leaky_relu_op->context.lut_contiguous = (struct lut_contiguous_context) {
197 .x = input,
198 .x_stride = input_stride * sizeof(uint8_t),
199 .t = leaky_relu_op->lookup_table,
200 .y = output,
201 .y_stride = output_stride * sizeof(uint8_t),
202 .ukernel = xnn_params.x8.lut,
203 };
204 leaky_relu_op->compute.type = xnn_parallelization_type_1d_tile_1d;
205 leaky_relu_op->compute.task_1d_tile_1d = (pthreadpool_task_1d_tile_1d_t) xnn_compute_lut_contiguous;
206 leaky_relu_op->compute.range[0] = batch_size * channels * sizeof(uint8_t);
207 leaky_relu_op->compute.tile[0] = block_size;
208 } else {
209 leaky_relu_op->context.lut_strided = (struct lut_strided_context) {
210 .n = channels,
211 .x = input,
212 .x_stride = input_stride * sizeof(uint8_t),
213 .t = leaky_relu_op->lookup_table,
214 .y = output,
215 .y_stride = output_stride * sizeof(uint8_t),
216 .ukernel = xnn_params.x8.lut,
217 };
218 leaky_relu_op->compute.type = xnn_parallelization_type_1d;
219 leaky_relu_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_lut_strided;
220 leaky_relu_op->compute.range[0] = batch_size;
221 leaky_relu_op->compute.tile[0] = 0;
222 }
223 leaky_relu_op->state = xnn_run_state_ready;
224
225 return xnn_status_success;
226 }
227