1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #include <assert.h>
10 #include <math.h>
11 #include <stddef.h>
12 #include <stdint.h>
13 #include <stdlib.h>
14 
15 #include <xnnpack.h>
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/operator.h>
18 #include <xnnpack/log.h>
19 
20 
xnn_create_leaky_relu_nc_qu8(size_t channels,size_t input_stride,size_t output_stride,float negative_slope,uint8_t input_zero_point,float input_scale,uint8_t output_zero_point,float output_scale,uint8_t output_min,uint8_t output_max,uint32_t flags,xnn_operator_t * leaky_relu_op_out)21 enum xnn_status xnn_create_leaky_relu_nc_qu8(
22     size_t channels,
23     size_t input_stride,
24     size_t output_stride,
25     float negative_slope,
26     uint8_t input_zero_point,
27     float input_scale,
28     uint8_t output_zero_point,
29     float output_scale,
30     uint8_t output_min,
31     uint8_t output_max,
32     uint32_t flags,
33     xnn_operator_t* leaky_relu_op_out)
34 {
35   xnn_operator_t leaky_relu_op = NULL;
36   enum xnn_status status = xnn_status_uninitialized;
37 
38   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
39     xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
40       xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8));
41     goto error;
42   }
43 
44   status = xnn_status_invalid_parameter;
45 
46   if (channels == 0) {
47     xnn_log_error(
48       "failed to create %s operator with %zu channels: number of channels must be non-zero",
49       xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8), channels);
50     goto error;
51   }
52 
53   if (input_stride < channels) {
54     xnn_log_error(
55       "failed to create %s operator with input element stride of %zu: "
56       "stride must be at least as large as the number of channels (%zu)",
57       xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8), input_stride, channels);
58     goto error;
59   }
60 
61   if (output_stride < channels) {
62     xnn_log_error(
63       "failed to create %s operator with output element stride of %zu: "
64       "stride must be at least as large as the number of channels (%zu)",
65       xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8), output_stride, channels);
66     goto error;
67   }
68 
69   if (negative_slope <= 0.0f || !isnormal(negative_slope)) {
70     xnn_log_error(
71       "failed to create %s operator with %.7g negative slope: slope must be finite, normalized, and positive",
72       xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8), negative_slope);
73     goto error;
74   }
75 
76   if (negative_slope > 1.0f) {
77     xnn_log_error(
78       "failed to create %s operator with %.7g negative slope: slope must not exceed 1.0",
79       xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8), negative_slope);
80     goto error;
81   }
82 
83   if (input_scale <= 0.0f || !isnormal(input_scale)) {
84     xnn_log_error(
85       "failed to create %s operator with %.7g input scale: scale must be finite, normalized, and positive",
86       xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8), input_scale);
87     goto error;
88   }
89 
90   if (output_scale <= 0.0f || !isnormal(output_scale)) {
91     xnn_log_error(
92       "failed to create %s operator with %.7g output scale: scale must be finite, normalized, and positive",
93       xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8), output_scale);
94     goto error;
95   }
96 
97   if (output_min >= output_max) {
98     xnn_log_error(
99       "failed to create %s operator with [%" PRIu8 ", %" PRIu8 "] output range: range min must be below range max",
100       xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8), output_min, output_max);
101     goto error;
102   }
103 
104   status = xnn_status_unsupported_parameter;
105 
106   const float input_output_scale = input_scale / output_scale;
107   if (input_output_scale < 0x1.0p-8f || input_output_scale >= 0x1.0p+8f) {
108     xnn_log_error(
109       "failed to create %s operator with %.7g input-to-output scale ratio: "
110       "scale ratio must be in [2**-8, 2**8) range",
111       xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8), input_output_scale);
112     goto error;
113   }
114 
115   status = xnn_status_out_of_memory;
116 
117   leaky_relu_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
118   if (leaky_relu_op == NULL) {
119     xnn_log_error(
120       "failed to allocate %zu bytes for %s operator descriptor",
121       sizeof(struct xnn_operator), xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8));
122     goto error;
123   }
124 
125   leaky_relu_op->lookup_table = xnn_allocate_simd_memory(256 * sizeof(uint8_t));
126   if (leaky_relu_op->lookup_table == NULL) {
127     xnn_log_error(
128       "failed to allocate 256 bytes for %s operator lookup table",
129       xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8));
130     goto error;
131   }
132 
133   uint8_t* lookup_table = leaky_relu_op->lookup_table;
134   const float scaled_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
135   const float scaled_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
136   for (int32_t i = 0; i < 256; i++) {
137     const float x = input_output_scale * (float) (i - (int32_t) (uint32_t) input_zero_point);
138     float y = x < 0.0f ? x * negative_slope : x;
139     if (y < scaled_min_less_zero_point) {
140       y = scaled_min_less_zero_point;
141     }
142     if (y > scaled_max_less_zero_point) {
143       y = scaled_max_less_zero_point;
144     }
145     lookup_table[(uint32_t) i] = (uint8_t) (lrintf(y) + (long) output_zero_point);
146   }
147 
148   leaky_relu_op->channels = channels;
149   leaky_relu_op->input_pixel_stride = input_stride;
150   leaky_relu_op->output_pixel_stride = output_stride;
151 
152   leaky_relu_op->type = xnn_operator_type_leaky_relu_nc_qu8;
153 
154   leaky_relu_op->state = xnn_run_state_invalid;
155 
156   *leaky_relu_op_out = leaky_relu_op;
157   return xnn_status_success;
158 
159 error:
160   xnn_delete_operator(leaky_relu_op);
161   return status;
162 }
163 
xnn_setup_leaky_relu_nc_qu8(xnn_operator_t leaky_relu_op,size_t batch_size,const uint8_t * input,uint8_t * output,pthreadpool_t threadpool)164 enum xnn_status xnn_setup_leaky_relu_nc_qu8(
165     xnn_operator_t leaky_relu_op,
166     size_t batch_size,
167     const uint8_t* input,
168     uint8_t* output,
169     pthreadpool_t threadpool)
170 {
171   if (leaky_relu_op->type != xnn_operator_type_leaky_relu_nc_qu8) {
172     xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
173       xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8),
174       xnn_operator_type_to_string(leaky_relu_op->type));
175     return xnn_status_invalid_parameter;
176   }
177   leaky_relu_op->state = xnn_run_state_invalid;
178 
179   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
180     xnn_log_error(
181       "failed to setup %s operator: XNNPACK is not initialized",
182       xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8));
183     return xnn_status_uninitialized;
184   }
185 
186   if (batch_size == 0) {
187     leaky_relu_op->state = xnn_run_state_skip;
188     return xnn_status_success;
189   }
190 
191   const size_t channels = leaky_relu_op->channels;
192   const size_t input_stride = leaky_relu_op->input_pixel_stride;
193   const size_t output_stride = leaky_relu_op->output_pixel_stride;
194   if ((((input_stride ^ channels) | (output_stride ^ channels)) == 0) || batch_size == 1) {
195     const size_t block_size = 1024;
196     leaky_relu_op->context.lut_contiguous = (struct lut_contiguous_context) {
197       .x = input,
198       .x_stride = input_stride * sizeof(uint8_t),
199       .t = leaky_relu_op->lookup_table,
200       .y = output,
201       .y_stride = output_stride * sizeof(uint8_t),
202       .ukernel = xnn_params.x8.lut,
203     };
204     leaky_relu_op->compute.type = xnn_parallelization_type_1d_tile_1d;
205     leaky_relu_op->compute.task_1d_tile_1d = (pthreadpool_task_1d_tile_1d_t) xnn_compute_lut_contiguous;
206     leaky_relu_op->compute.range[0] = batch_size * channels * sizeof(uint8_t);
207     leaky_relu_op->compute.tile[0] = block_size;
208   } else {
209     leaky_relu_op->context.lut_strided = (struct lut_strided_context) {
210       .n = channels,
211       .x = input,
212       .x_stride = input_stride * sizeof(uint8_t),
213       .t = leaky_relu_op->lookup_table,
214       .y = output,
215       .y_stride = output_stride * sizeof(uint8_t),
216       .ukernel = xnn_params.x8.lut,
217     };
218     leaky_relu_op->compute.type = xnn_parallelization_type_1d;
219     leaky_relu_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_lut_strided;
220     leaky_relu_op->compute.range[0] = batch_size;
221     leaky_relu_op->compute.tile[0] = 0;
222   }
223   leaky_relu_op->state = xnn_run_state_ready;
224 
225   return xnn_status_success;
226 }
227