1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8
9 #include <assert.h>
10 #include <math.h>
11 #include <stddef.h>
12 #include <stdint.h>
13 #include <stdlib.h>
14
15 #include <xnnpack.h>
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/operator.h>
18 #include <xnnpack/log.h>
19
20
xnn_create_sigmoid_nc_qu8(size_t channels,size_t input_stride,size_t output_stride,uint8_t input_zero_point,float input_scale,uint8_t output_zero_point,float output_scale,uint8_t output_min,uint8_t output_max,uint32_t flags,xnn_operator_t * sigmoid_op_out)21 enum xnn_status xnn_create_sigmoid_nc_qu8(
22 size_t channels,
23 size_t input_stride,
24 size_t output_stride,
25 uint8_t input_zero_point,
26 float input_scale,
27 uint8_t output_zero_point,
28 float output_scale,
29 uint8_t output_min,
30 uint8_t output_max,
31 uint32_t flags,
32 xnn_operator_t* sigmoid_op_out)
33 {
34 xnn_operator_t sigmoid_op = NULL;
35 enum xnn_status status = xnn_status_uninitialized;
36
37 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
38 xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
39 xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8));
40 goto error;
41 }
42
43 status = xnn_status_invalid_parameter;
44
45 if (channels == 0) {
46 xnn_log_error(
47 "failed to create %s operator with %zu channels: number of channels must be non-zero",
48 xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), channels);
49 goto error;
50 }
51
52 if (input_stride < channels) {
53 xnn_log_error(
54 "failed to create %s operator with input element stride of %zu: "
55 "stride must be at least as large as the number of channels (%zu)",
56 xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), input_stride, channels);
57 goto error;
58 }
59
60 if (output_stride < channels) {
61 xnn_log_error(
62 "failed to create %s operator with output element stride of %zu: "
63 "stride must be at least as large as the number of channels (%zu)",
64 xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), output_stride, channels);
65 goto error;
66 }
67
68 if (input_scale <= 0.0f || !isnormal(input_scale)) {
69 xnn_log_error(
70 "failed to create %s operator with %.7g input scale: scale must be finite, normalized, and positive",
71 xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), input_scale);
72 goto error;
73 }
74
75 if (output_scale <= 0.0f || !isnormal(output_scale)) {
76 xnn_log_error(
77 "failed to create %s operator with %.7g output scale: scale must be finite, normalized, and positive",
78 xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), output_scale);
79 goto error;
80 }
81
82 if (output_min >= output_max) {
83 xnn_log_error(
84 "failed to create %s operator with [%" PRIu8 ", %" PRIu8 "] output range: range min must be below range max",
85 xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), output_min, output_max);
86 goto error;
87 }
88
89 status = xnn_status_unsupported_parameter;
90
91 if (output_scale != 0x1.0p-8f) {
92 xnn_log_error(
93 "failed to create %s operator with %.7g output scale: only output scale of 1/256 is supported",
94 xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), output_scale);
95 goto error;
96 }
97
98 if (output_zero_point != 0) {
99 xnn_log_error(
100 "failed to create %s operator with %" PRIu8 " output zero point: only output zero point of 0 is supported",
101 xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), output_zero_point);
102 goto error;
103 }
104
105 status = xnn_status_out_of_memory;
106
107 sigmoid_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
108 if (sigmoid_op == NULL) {
109 xnn_log_error(
110 "failed to allocate %zu bytes for %s operator descriptor",
111 sizeof(struct xnn_operator), xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8));
112 goto error;
113 }
114
115 sigmoid_op->lookup_table = xnn_allocate_simd_memory(256 * sizeof(uint8_t));
116 if (sigmoid_op->lookup_table == NULL) {
117 xnn_log_error(
118 "failed to allocate 256 bytes for %s operator lookup table",
119 xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8));
120 goto error;
121 }
122
123 uint8_t* lookup_table = sigmoid_op->lookup_table;
124 const float scaled_min = (float) (int32_t) output_min;
125 const float scaled_max = (float) (int32_t) output_max;
126 for (int32_t i = 0; i < 256; i++) {
127 const float x = input_scale * (float) (i - (int32_t) (uint32_t) input_zero_point);
128 // Scale sigmoid(x) by 1 / output scale = 256.0
129 float scaled_sigmoid_x = 256.0f / (1.0f + expf(-x));
130 if (scaled_sigmoid_x < scaled_min) {
131 scaled_sigmoid_x = scaled_min;
132 }
133 if (scaled_sigmoid_x > scaled_max) {
134 scaled_sigmoid_x = scaled_max;
135 }
136 lookup_table[(uint32_t) i] = (uint8_t) lrintf(scaled_sigmoid_x);
137 }
138
139 sigmoid_op->channels = channels;
140 sigmoid_op->input_pixel_stride = input_stride;
141 sigmoid_op->output_pixel_stride = output_stride;
142
143 sigmoid_op->type = xnn_operator_type_sigmoid_nc_qu8;
144
145 sigmoid_op->state = xnn_run_state_invalid;
146
147 *sigmoid_op_out = sigmoid_op;
148 return xnn_status_success;
149
150 error:
151 xnn_delete_operator(sigmoid_op);
152 return status;
153 }
154
xnn_setup_sigmoid_nc_qu8(xnn_operator_t sigmoid_op,size_t batch_size,const uint8_t * input,uint8_t * output,pthreadpool_t threadpool)155 enum xnn_status xnn_setup_sigmoid_nc_qu8(
156 xnn_operator_t sigmoid_op,
157 size_t batch_size,
158 const uint8_t* input,
159 uint8_t* output,
160 pthreadpool_t threadpool)
161 {
162 if (sigmoid_op->type != xnn_operator_type_sigmoid_nc_qu8) {
163 xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
164 xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8),
165 xnn_operator_type_to_string(sigmoid_op->type));
166 return xnn_status_invalid_parameter;
167 }
168 sigmoid_op->state = xnn_run_state_invalid;
169
170 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
171 xnn_log_error("failed to setup %s operator: XNNPACK is not initialized",
172 xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8));
173 return xnn_status_uninitialized;
174 }
175
176 if (batch_size == 0) {
177 sigmoid_op->state = xnn_run_state_skip;
178 return xnn_status_success;
179 }
180
181 sigmoid_op->batch_size = batch_size;
182 sigmoid_op->input = input;
183 sigmoid_op->output = output;
184
185 const size_t channels = sigmoid_op->channels;
186 const size_t input_stride = sigmoid_op->input_pixel_stride;
187 const size_t output_stride = sigmoid_op->output_pixel_stride;
188 if ((((input_stride ^ channels) | (output_stride ^ channels)) == 0) || batch_size == 1) {
189 const size_t block_size = 1024;
190 sigmoid_op->context.lut_contiguous = (struct lut_contiguous_context) {
191 .x = input,
192 .x_stride = input_stride * sizeof(uint8_t),
193 .t = sigmoid_op->lookup_table,
194 .y = output,
195 .y_stride = output_stride * sizeof(uint8_t),
196 .ukernel = xnn_params.x8.lut,
197 };
198 sigmoid_op->compute.type = xnn_parallelization_type_1d_tile_1d;
199 sigmoid_op->compute.task_1d_tile_1d = (pthreadpool_task_1d_tile_1d_t) xnn_compute_lut_contiguous;
200 sigmoid_op->compute.range[0] = batch_size * channels * sizeof(uint8_t);
201 sigmoid_op->compute.tile[0] = block_size;
202 } else {
203 sigmoid_op->context.lut_strided = (struct lut_strided_context) {
204 .n = channels,
205 .x = input,
206 .x_stride = input_stride * sizeof(uint8_t),
207 .t = sigmoid_op->lookup_table,
208 .y = output,
209 .y_stride = output_stride * sizeof(uint8_t),
210 .ukernel = xnn_params.x8.lut,
211 };
212 sigmoid_op->compute.type = xnn_parallelization_type_1d;
213 sigmoid_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_lut_strided;
214 sigmoid_op->compute.range[0] = batch_size;
215 sigmoid_op->compute.tile[0] = 0;
216 }
217 sigmoid_op->state = xnn_run_state_ready;
218
219 return xnn_status_success;
220 }
221