1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #include <assert.h>
10 #include <math.h>
11 #include <stddef.h>
12 #include <stdint.h>
13 #include <stdlib.h>
14 
15 #include <xnnpack.h>
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/operator.h>
18 #include <xnnpack/log.h>
19 #include <xnnpack/params-init.h>
20 
21 
xnn_create_softmax_nc_qu8(size_t channels,size_t input_stride,size_t output_stride,float input_scale,uint8_t output_zero_point,float output_scale,uint32_t flags,xnn_operator_t * softmax_op_out)22 enum xnn_status xnn_create_softmax_nc_qu8(
23     size_t channels,
24     size_t input_stride,
25     size_t output_stride,
26     float input_scale,
27     uint8_t output_zero_point,
28     float output_scale,
29     uint32_t flags,
30     xnn_operator_t* softmax_op_out)
31 {
32   xnn_operator_t softmax_op = NULL;
33   enum xnn_status status = xnn_status_uninitialized;
34 
35   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
36     xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
37       xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8));
38     goto error;
39   }
40 
41   status = xnn_status_invalid_parameter;
42 
43   if (channels == 0) {
44     xnn_log_error(
45       "failed to create %s operator with %zu channels: number of channels must be non-zero",
46       xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), channels);
47     goto error;
48   }
49 
50   if (input_stride < channels) {
51     xnn_log_error(
52       "failed to create %s operator with input element stride of %zu: "
53       "stride must be at least as large as the number of channels (%zu)",
54       xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), input_stride, channels);
55     goto error;
56   }
57 
58   if (output_stride < channels) {
59     xnn_log_error(
60       "failed to create %s operator with output element stride of %zu: "
61       "stride must be at least as large as the number of channels (%zu)",
62       xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), output_stride, channels);
63     goto error;
64   }
65 
66   if (input_scale <= 0.0f || !isnormal(input_scale)) {
67     xnn_log_error(
68       "failed to create %s operator with %.7g input scale: scale must be finite, normalized, and positive",
69       xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), input_scale);
70     goto error;
71   }
72 
73   if (output_scale <= 0.0f || !isnormal(output_scale)) {
74     xnn_log_error(
75       "failed to create %s operator with %.7g output scale: scale must be finite, normalized, and positive",
76       xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), output_scale);
77     goto error;
78   }
79 
80   status = xnn_status_unsupported_parameter;
81 
82   if (output_scale != 0x1.0p-8f) {
83     xnn_log_error(
84       "failed to create %s operator with %.7g output scale: only output scale of 1/256 is supported",
85       xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), output_scale);
86     goto error;
87   }
88 
89   if (output_zero_point != 0) {
90     xnn_log_error(
91       "failed to create %s operator with %" PRIu8 " output zero point: only output zero point of 0 is supported",
92       xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), output_zero_point);
93     goto error;
94   }
95 
96   status = xnn_status_out_of_memory;
97 
98   softmax_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
99   if (softmax_op == NULL) {
100     xnn_log_error(
101       "failed to allocate %zu bytes for %s operator descriptor",
102       sizeof(struct xnn_operator), xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8));
103     goto error;
104   }
105 
106   softmax_op->lookup_table = xnn_allocate_simd_memory(256 * sizeof(uint32_t));
107   if (softmax_op->lookup_table == NULL) {
108     xnn_log_error(
109       "failed to allocate 256 bytes for %s operator lookup table",
110       xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8));
111     goto error;
112   }
113 
114   uint32_t* lookup_table = softmax_op->lookup_table;
115   const double qscale = fmin(((double) UINT32_MAX) / (double) channels, 8388607.0);
116   for (int32_t i = 0; i < 256; i++) {
117     const double scaled_exp_xi = qscale * exp((double) (i - 255) * (double) input_scale);
118     lookup_table[(uint32_t) i] = (uint32_t) lrint(scaled_exp_xi);
119   }
120 
121   softmax_op->channels = channels;
122   softmax_op->input_pixel_stride = input_stride;
123   softmax_op->output_pixel_stride = output_stride;
124 
125   softmax_op->type = xnn_operator_type_softmax_nc_qu8;
126 
127   softmax_op->state = xnn_run_state_invalid;
128 
129   *softmax_op_out = softmax_op;
130   return xnn_status_success;
131 
132 error:
133   xnn_delete_operator(softmax_op);
134   return status;
135 }
136 
xnn_setup_softmax_nc_qu8(xnn_operator_t softmax_op,size_t batch_size,const uint8_t * input,uint8_t * output,pthreadpool_t threadpool)137 enum xnn_status xnn_setup_softmax_nc_qu8(
138     xnn_operator_t softmax_op,
139     size_t batch_size,
140     const uint8_t* input,
141     uint8_t* output,
142     pthreadpool_t threadpool)
143 {
144   if (softmax_op->type != xnn_operator_type_softmax_nc_qu8) {
145     xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
146       xnn_operator_type_to_string(xnn_operator_type_softmax_nc_qu8),
147       xnn_operator_type_to_string(softmax_op->type));
148     return xnn_status_invalid_parameter;
149   }
150   softmax_op->state = xnn_run_state_invalid;
151 
152   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
153     xnn_log_error("failed to setup %s operator: XNNPACK is not initialized",
154       xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8));
155     return xnn_status_uninitialized;
156   }
157 
158   if (batch_size == 0) {
159     softmax_op->state = xnn_run_state_skip;
160     return xnn_status_success;
161   }
162 
163   softmax_op->batch_size = batch_size;
164   softmax_op->input = input;
165   softmax_op->output = output;
166 
167   softmax_op->context.u8_softmax = (struct u8_softmax_context) {
168     .n = softmax_op->channels,
169     .x = input,
170     .x_stride = softmax_op->input_pixel_stride * sizeof(uint8_t),
171     .t = softmax_op->lookup_table,
172     .y = output,
173     .y_stride = softmax_op->output_pixel_stride * sizeof(uint8_t),
174     .rmax_ukernel = xnn_params.u8.rmax,
175     .lut_norm_ukernel = xnn_params.u8.lut32norm,
176   };
177   softmax_op->compute.type = xnn_parallelization_type_1d;
178   softmax_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_u8_softmax;
179   softmax_op->compute.range[0] = batch_size;
180   softmax_op->state = xnn_run_state_ready;
181 
182   return xnn_status_success;
183 }
184 
xnn_create_softmax_nc_f32(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * softmax_op_out)185 enum xnn_status xnn_create_softmax_nc_f32(
186     size_t channels,
187     size_t input_stride,
188     size_t output_stride,
189     uint32_t flags,
190     xnn_operator_t* softmax_op_out)
191 {
192   xnn_operator_t softmax_op = NULL;
193   enum xnn_status status = xnn_status_uninitialized;
194 
195   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
196     xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
197       xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_f32));
198     goto error;
199   }
200 
201   status = xnn_status_invalid_parameter;
202 
203   if (channels == 0) {
204     xnn_log_error(
205       "failed to create %s operator with %zu channels: number of channels must be non-zero",
206       xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_f32), channels);
207     goto error;
208   }
209 
210   if (input_stride < channels) {
211     xnn_log_error(
212       "failed to create %s operator with input element stride of %zu: "
213       "stride must be at least as large as the number of channels (%zu)",
214       xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_f32), input_stride, channels);
215     goto error;
216   }
217 
218   if (output_stride < channels) {
219     xnn_log_error(
220       "failed to create %s operator with output element stride of %zu: "
221       "stride must be at least as large as the number of channels (%zu)",
222       xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_f32), output_stride, channels);
223     goto error;
224   }
225 
226   status = xnn_status_out_of_memory;
227 
228   softmax_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
229   if (softmax_op == NULL) {
230     xnn_log_error(
231       "failed to allocate %zu bytes for %s operator descriptor",
232       sizeof(struct xnn_operator), xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_f32));
233     goto error;
234   }
235 
236   softmax_op->channels = channels;
237   softmax_op->input_pixel_stride = input_stride;
238   softmax_op->output_pixel_stride = output_stride;
239 
240   softmax_op->type = xnn_operator_type_softmax_nc_f32;
241 
242   softmax_op->state = xnn_run_state_invalid;
243 
244   *softmax_op_out = softmax_op;
245   return xnn_status_success;
246 
247 error:
248   xnn_delete_operator(softmax_op);
249   return status;
250 }
251 
xnn_setup_softmax_nc_f32(xnn_operator_t softmax_op,size_t batch_size,const float * input,float * output,pthreadpool_t threadpool)252 enum xnn_status xnn_setup_softmax_nc_f32(
253     xnn_operator_t softmax_op,
254     size_t batch_size,
255     const float* input,
256     float* output,
257     pthreadpool_t threadpool)
258 {
259   if (softmax_op->type != xnn_operator_type_softmax_nc_f32) {
260     xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
261       xnn_operator_type_to_string(xnn_operator_type_softmax_nc_f32),
262       xnn_operator_type_to_string(softmax_op->type));
263     return xnn_status_invalid_parameter;
264   }
265   softmax_op->state = xnn_run_state_invalid;
266 
267   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
268     xnn_log_error("failed to setup %s operator: XNNPACK is not initialized",
269       xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_f32));
270     return xnn_status_uninitialized;
271   }
272 
273   if (batch_size == 0) {
274     softmax_op->state = xnn_run_state_skip;
275     return xnn_status_success;
276   }
277 
278   softmax_op->batch_size = batch_size;
279   softmax_op->input = input;
280   softmax_op->output = output;
281 
282   softmax_op->context.f32_three_pass_softmax = (struct f32_three_pass_softmax_context) {
283     .n = softmax_op->channels * sizeof(float),
284     .x = input,
285     .x_stride = softmax_op->input_pixel_stride * sizeof(float),
286     .y = output,
287     .y_stride = softmax_op->output_pixel_stride * sizeof(float),
288     .rmax_ukernel = xnn_params.f32.rmax,
289     .raddstoreexpminusmax_ukernel = xnn_params.f32.raddstoreexpminusmax,
290     .vmulc_ukernel = xnn_params.f32.vmul.minmax.opc_ukernel,
291     .params = xnn_init_f32_minmax_params(-INFINITY, INFINITY),
292   };
293   softmax_op->compute.type = xnn_parallelization_type_1d;
294   softmax_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_f32_three_pass_softmax;
295   softmax_op->compute.range[0] = batch_size;
296   softmax_op->state = xnn_run_state_ready;
297 
298   return xnn_status_success;
299 }
300