1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #include <assert.h>
10 #include <math.h>
11 #include <stddef.h>
12 #include <stdint.h>
13 #include <stdlib.h>
14 
15 #include <xnnpack.h>
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/operator.h>
18 #include <xnnpack/log.h>
19 
20 
xnn_create_sigmoid_nc_qu8(size_t channels,size_t input_stride,size_t output_stride,uint8_t input_zero_point,float input_scale,uint8_t output_zero_point,float output_scale,uint8_t output_min,uint8_t output_max,uint32_t flags,xnn_operator_t * sigmoid_op_out)21 enum xnn_status xnn_create_sigmoid_nc_qu8(
22     size_t channels,
23     size_t input_stride,
24     size_t output_stride,
25     uint8_t input_zero_point,
26     float input_scale,
27     uint8_t output_zero_point,
28     float output_scale,
29     uint8_t output_min,
30     uint8_t output_max,
31     uint32_t flags,
32     xnn_operator_t* sigmoid_op_out)
33 {
34   xnn_operator_t sigmoid_op = NULL;
35   enum xnn_status status = xnn_status_uninitialized;
36 
37   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
38     xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
39       xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8));
40     goto error;
41   }
42 
43   status = xnn_status_invalid_parameter;
44 
45   if (channels == 0) {
46     xnn_log_error(
47       "failed to create %s operator with %zu channels: number of channels must be non-zero",
48       xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), channels);
49     goto error;
50   }
51 
52   if (input_stride < channels) {
53     xnn_log_error(
54       "failed to create %s operator with input element stride of %zu: "
55       "stride must be at least as large as the number of channels (%zu)",
56       xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), input_stride, channels);
57     goto error;
58   }
59 
60   if (output_stride < channels) {
61     xnn_log_error(
62       "failed to create %s operator with output element stride of %zu: "
63       "stride must be at least as large as the number of channels (%zu)",
64       xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), output_stride, channels);
65     goto error;
66   }
67 
68   if (input_scale <= 0.0f || !isnormal(input_scale)) {
69     xnn_log_error(
70       "failed to create %s operator with %.7g input scale: scale must be finite, normalized, and positive",
71       xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), input_scale);
72     goto error;
73   }
74 
75   if (output_scale <= 0.0f || !isnormal(output_scale)) {
76     xnn_log_error(
77       "failed to create %s operator with %.7g output scale: scale must be finite, normalized, and positive",
78       xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), output_scale);
79     goto error;
80   }
81 
82   if (output_min >= output_max) {
83     xnn_log_error(
84       "failed to create %s operator with [%" PRIu8 ", %" PRIu8 "] output range: range min must be below range max",
85       xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), output_min, output_max);
86     goto error;
87   }
88 
89   status = xnn_status_unsupported_parameter;
90 
91   if (output_scale != 0x1.0p-8f) {
92     xnn_log_error(
93       "failed to create %s operator with %.7g output scale: only output scale of 1/256 is supported",
94       xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), output_scale);
95     goto error;
96   }
97 
98   if (output_zero_point != 0) {
99     xnn_log_error(
100       "failed to create %s operator with %" PRIu8 " output zero point: only output zero point of 0 is supported",
101       xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), output_zero_point);
102     goto error;
103   }
104 
105   status = xnn_status_out_of_memory;
106 
107   sigmoid_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
108   if (sigmoid_op == NULL) {
109     xnn_log_error(
110       "failed to allocate %zu bytes for %s operator descriptor",
111       sizeof(struct xnn_operator), xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8));
112     goto error;
113   }
114 
115   sigmoid_op->lookup_table = xnn_allocate_simd_memory(256 * sizeof(uint8_t));
116   if (sigmoid_op->lookup_table == NULL) {
117     xnn_log_error(
118       "failed to allocate 256 bytes for %s operator lookup table",
119       xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8));
120     goto error;
121   }
122 
123   uint8_t* lookup_table = sigmoid_op->lookup_table;
124   const float scaled_min = (float) (int32_t) output_min;
125   const float scaled_max = (float) (int32_t) output_max;
126   for (int32_t i = 0; i < 256; i++) {
127     const float x = input_scale * (float) (i - (int32_t) (uint32_t) input_zero_point);
128     // Scale sigmoid(x) by 1 / output scale = 256.0
129     float scaled_sigmoid_x = 256.0f / (1.0f + expf(-x));
130     if (scaled_sigmoid_x < scaled_min) {
131       scaled_sigmoid_x = scaled_min;
132     }
133     if (scaled_sigmoid_x > scaled_max) {
134       scaled_sigmoid_x = scaled_max;
135     }
136     lookup_table[(uint32_t) i] = (uint8_t) lrintf(scaled_sigmoid_x);
137   }
138 
139   sigmoid_op->channels = channels;
140   sigmoid_op->input_pixel_stride = input_stride;
141   sigmoid_op->output_pixel_stride = output_stride;
142 
143   sigmoid_op->type = xnn_operator_type_sigmoid_nc_qu8;
144 
145   sigmoid_op->state = xnn_run_state_invalid;
146 
147   *sigmoid_op_out = sigmoid_op;
148   return xnn_status_success;
149 
150 error:
151   xnn_delete_operator(sigmoid_op);
152   return status;
153 }
154 
xnn_setup_sigmoid_nc_qu8(xnn_operator_t sigmoid_op,size_t batch_size,const uint8_t * input,uint8_t * output,pthreadpool_t threadpool)155 enum xnn_status xnn_setup_sigmoid_nc_qu8(
156     xnn_operator_t sigmoid_op,
157     size_t batch_size,
158     const uint8_t* input,
159     uint8_t* output,
160     pthreadpool_t threadpool)
161 {
162   if (sigmoid_op->type != xnn_operator_type_sigmoid_nc_qu8) {
163     xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
164       xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8),
165       xnn_operator_type_to_string(sigmoid_op->type));
166     return xnn_status_invalid_parameter;
167   }
168   sigmoid_op->state = xnn_run_state_invalid;
169 
170   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
171     xnn_log_error("failed to setup %s operator: XNNPACK is not initialized",
172       xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8));
173     return xnn_status_uninitialized;
174   }
175 
176   if (batch_size == 0) {
177     sigmoid_op->state = xnn_run_state_skip;
178     return xnn_status_success;
179   }
180 
181   sigmoid_op->batch_size = batch_size;
182   sigmoid_op->input = input;
183   sigmoid_op->output = output;
184 
185   const size_t channels = sigmoid_op->channels;
186   const size_t input_stride = sigmoid_op->input_pixel_stride;
187   const size_t output_stride = sigmoid_op->output_pixel_stride;
188   if ((((input_stride ^ channels) | (output_stride ^ channels)) == 0) || batch_size == 1) {
189     const size_t block_size = 1024;
190     sigmoid_op->context.lut_contiguous = (struct lut_contiguous_context) {
191       .x = input,
192       .x_stride = input_stride * sizeof(uint8_t),
193       .t = sigmoid_op->lookup_table,
194       .y = output,
195       .y_stride = output_stride * sizeof(uint8_t),
196       .ukernel = xnn_params.x8.lut,
197     };
198     sigmoid_op->compute.type = xnn_parallelization_type_1d_tile_1d;
199     sigmoid_op->compute.task_1d_tile_1d = (pthreadpool_task_1d_tile_1d_t) xnn_compute_lut_contiguous;
200     sigmoid_op->compute.range[0] = batch_size * channels * sizeof(uint8_t);
201     sigmoid_op->compute.tile[0] = block_size;
202   } else {
203     sigmoid_op->context.lut_strided = (struct lut_strided_context) {
204       .n = channels,
205       .x = input,
206       .x_stride = input_stride * sizeof(uint8_t),
207       .t = sigmoid_op->lookup_table,
208       .y = output,
209       .y_stride = output_stride * sizeof(uint8_t),
210       .ukernel = xnn_params.x8.lut,
211     };
212     sigmoid_op->compute.type = xnn_parallelization_type_1d;
213     sigmoid_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_lut_strided;
214     sigmoid_op->compute.range[0] = batch_size;
215     sigmoid_op->compute.tile[0] = 0;
216   }
217   sigmoid_op->state = xnn_run_state_ready;
218 
219   return xnn_status_success;
220 }
221