1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ANDROID_RENDERSCRIPT_TOOLKIT_TOOLKIT_H
18 #define ANDROID_RENDERSCRIPT_TOOLKIT_TOOLKIT_H
19 
20 #include <cstdint>
21 #include <memory>
22 
23 namespace android {
24 namespace renderscript {
25 
26 class TaskProcessor;
27 
28 /**
29  * Define a range of data to process.
30  *
31  * This class is used to restrict a Toolkit operation to a rectangular subset of the input
32  * tensor.
33  *
34  * @property startX The index of the first value to be included on the X axis.
35  * @property endX The index after the last value to be included on the X axis.
36  * @property startY The index of the first value to be included on the Y axis.
37  * @property endY The index after the last value to be included on the Y axis.
38  */
39 struct Restriction {
40     size_t startX;
41     size_t endX;
42     size_t startY;
43     size_t endY;
44 };
45 
46 /**
47  * A collection of high-performance graphic utility functions like blur and blend.
48  *
49  * This toolkit provides ten image manipulation functions: blend, blur, color matrix, convolve,
50  * histogram, histogramDot, lut, lut3d, resize, and YUV to RGB. These functions execute
51  * multithreaded on the CPU.
52  *
53  * These functions work over raw byte arrays. You'll need to specify the width and height of
54  * the data to be processed, as well as the number of bytes per pixel. For most use cases,
55  * this will be 4.
56  *
57  * You should instantiate the Toolkit once and reuse it throughout your application.
58  * On instantiation, the Toolkit creates a thread pool that's used for processing all the functions.
59  * You can limit the number of pool threads used by the Toolkit via the constructor. The pool
60  * threads are destroyed once the Toolkit is destroyed, after any pending work is done.
61  *
62  * This library is thread safe. You can call methods from different pool threads. The functions will
63  * execute sequentially.
64  *
65  * A Java/Kotlin Toolkit is available. It calls this library through JNI.
66  *
67  * This toolkit can be used as a replacement for most RenderScript Intrinsic functions. Compared
68  * to RenderScript, it's simpler to use and more than twice as fast on the CPU. However RenderScript
69  * Intrinsics allow more flexibility for the type of allocation supported. In particular, this
70  * toolkit does not support allocations of floats.
71  */
72 class RenderScriptToolkit {
73     /** Each Toolkit method call is converted to a Task. The processor owns the thread pool. It
74      * tiles the tasks and schedule them over the pool threads.
75      */
76     std::unique_ptr<TaskProcessor> processor;
77 
78    public:
79     /**
80      * Creates the pool threads that are used for processing the method calls.
81      */
82     RenderScriptToolkit(int numberOfThreads = 0);
83     /**
84      * Destroys the thread pool. This stops any in-progress work; the Toolkit methods called from
85      * other pool threads will return without having completed the work. Because of the undefined
86      * state of the output buffers, an application should avoid destroying the Toolkit if other pool
87      * threads are executing Toolkit methods.
88      */
89     ~RenderScriptToolkit();
90 
91     /**
92      * Determines how a source buffer is blended into a destination buffer.
93      *
94      * See {@link RenderScriptToolkit::blend}.
95      *
96      * blend only works on 4 byte RGBA data. In the descriptions below, ".a" represents
97      * the alpha channel.
98      */
99     enum class BlendingMode {
100         /**
101          * dest = 0
102          *
103          * The destination is cleared, i.e. each pixel is set to (0, 0, 0, 0)
104          */
105         CLEAR = 0,
106         /**
107          * dest = src
108          *
109          * Sets each pixel of the destination to the corresponding one in the source.
110          */
111         SRC = 1,
112         /**
113          * dest = dest
114          *
115          * Leaves the destination untouched. This is a no-op.
116          */
117         DST = 2,
118         /**
119          * dest = src + dest * (1.0 - src.a)
120          */
121         SRC_OVER = 3,
122         /**
123          * dest = dest + src * (1.0 - dest.a)
124          */
125         DST_OVER = 4,
126         /**
127          * dest = src * dest.a
128          */
129         SRC_IN = 5,
130         /**
131          * dest = dest * src.a
132          */
133         DST_IN = 6,
134         /**
135          * dest = src * (1.0 - dest.a)
136          */
137         SRC_OUT = 7,
138         /**
139          * dest = dest * (1.0 - src.a)
140          */
141         DST_OUT = 8,
142         /**
143          * dest.rgb = src.rgb * dest.a + (1.0 - src.a) * dest.rgb, dest.a = dest.a
144          */
145         SRC_ATOP = 9,
146         /**
147          * dest = dest.rgb * src.a + (1.0 - dest.a) * src.rgb, dest.a = src.a
148          */
149         DST_ATOP = 10,
150         /**
151          * dest = {src.r ^ dest.r, src.g ^ dest.g, src.b ^ dest.b, src.a ^ dest.a}
152          *
153          * Note: this is NOT the Porter/Duff XOR mode; this is a bitwise xor.
154          */
155         XOR = 11,
156         /**
157          * dest = src * dest
158          */
159         MULTIPLY = 12,
160         /**
161          * dest = min(src + dest, 1.0)
162          */
163         ADD = 13,
164         /**
165          * dest = max(dest - src, 0.0)
166          */
167         SUBTRACT = 14
168     };
169 
170     /**
171      * Blend a source buffer with the destination buffer.
172      *
173      * Blends a source buffer and a destination buffer, placing the result in the destination
174      * buffer. The blending is done pairwise between two corresponding RGBA values found in
175      * each buffer. The mode parameter specifies one of fifteen blending operations.
176      * See {@link BlendingMode}.
177      *
178      * An optional range parameter can be set to restrict the operation to a rectangular subset
179      * of each buffer. If provided, the range must be wholly contained with the dimensions
180      * described by sizeX and sizeY.
181      *
182      * The source and destination buffers must have the same dimensions. Both buffers should be
183      * large enough for sizeX * sizeY * 4 bytes. The buffers have a row-major layout.
184      *
185      * @param mode The specific blending operation to do.
186      * @param source The RGBA input buffer.
187      * @param dest The destination buffer. Used for input and output.
188      * @param sizeX The width of both buffers, as a number of RGBA values.
189      * @param sizeY The height of both buffers, as a number of RGBA values.
190      * @param restriction When not null, restricts the operation to a 2D range of pixels.
191      */
192     void blend(BlendingMode mode, const uint8_t* _Nonnull source, uint8_t* _Nonnull dst,
193                size_t sizeX, size_t sizeY, const Restriction* _Nullable restriction = nullptr);
194 
195     /**
196      * Blur an image.
197      *
198      * Performs a Gaussian blur of the input image and stores the result in the out buffer.
199      *
200      * The radius determines which pixels are used to compute each blurred pixels. This Toolkit
201      * accepts values between 1 and 25. Larger values create a more blurred effect but also
202      * take longer to compute. When the radius extends past the edge, the edge pixel will
203      * be used as replacement for the pixel that's out off boundary.
204      *
205      * Each input pixel can either be represented by four bytes (RGBA format) or one byte
206      * for the less common blurring of alpha channel only image.
207      *
208      * An optional range parameter can be set to restrict the operation to a rectangular subset
209      * of each buffer. If provided, the range must be wholly contained with the dimensions
210      * described by sizeX and sizeY.
211      *
212      * The input and output buffers must have the same dimensions. Both buffers should be
213      * large enough for sizeX * sizeY * vectorSize bytes. The buffers have a row-major layout.
214      *
215      * @param in The buffer of the image to be blurred.
216      * @param out The buffer that receives the blurred image.
217      * @param sizeX The width of both buffers, as a number of 1 or 4 byte cells.
218      * @param sizeY The height of both buffers, as a number of 1 or 4 byte cells.
219      * @param vectorSize Either 1 or 4, the number of bytes in each cell, i.e. A vs. RGBA.
220      * @param radius The radius of the pixels used to blur.
221      * @param restriction When not null, restricts the operation to a 2D range of pixels.
222      */
223     void blur(const uint8_t* _Nonnull in, uint8_t* _Nonnull out, size_t sizeX, size_t sizeY,
224               size_t vectorSize, int radius, const Restriction* _Nullable restriction = nullptr);
225 
226     /**
227      * Identity matrix that can be passed to the {@link RenderScriptToolkit::colorMatrix} method.
228      *
229      * Using this matrix will result in no change to the pixel through multiplication although
230      * the pixel value can still be modified by the add vector, or transformed to a different
231      * format.
232      */
233     static constexpr float kIdentityMatrix[] =  {
234             1.0f, 0.0f, 0.0f, 0.0f,
235             0.0f, 1.0f, 0.0f, 0.0f,
236             0.0f, 0.0f, 1.0f, 0.0f,
237             0.0f, 0.0f, 0.0f, 1.0f
238     };
239 
240     /**
241      * Matrix to turn color pixels to a grey scale.
242      *
243      * Use this matrix with the {@link RenderScriptToolkit::colorMatrix} method to convert an
244      * image from color to greyscale.
245      */
246     static constexpr float kGreyScaleColorMatrix[] = {
247             0.299f, 0.299f, 0.299f, 0.0f,
248             0.587f, 0.587f, 0.587f, 0.0f,
249             0.114f, 0.114f, 0.114f, 0.0f,
250             0.0f,   0.0f,   0.0f,   1.0f
251     };
252 
253     /**
254      * Matrix to convert RGB to YUV.
255      *
256      * Use this matrix with the {@link RenderScriptToolkit::colorMatrix} method to convert the
257      * first three bytes of each pixel from RGB to YUV. This leaves the last byte (the alpha
258      * channel) untouched.
259      *
260      * This is a simplistic conversion. Most YUV buffers have more complicated format, not supported
261      * by this method.
262      */
263     static constexpr float kRgbToYuvMatrix[] = {
264             0.299f, -0.14713f,  0.615f,   0.0f,
265             0.587f, -0.28886f, -0.51499f, 0.0f,
266             0.114f,  0.436f,   -0.10001f, 0.0f,
267             0.0f,    0.0f,      0.0f,     1.0f
268     };
269 
270     /**
271      * Matrix to convert YUV to RGB.
272      *
273      * Use this matrix with the {@link RenderScriptToolkit::colorMatrix} method to convert the
274      * first three bytes of each pixel from YUV to RGB. This leaves the last byte (the alpha
275      * channel) untouched.
276      *
277      * This is a simplistic conversion. Most YUV buffers have more complicated format, not supported
278      * by this method. Use {@link RenderScriptToolkit::yuvToRgb} to convert these buffers.
279      */
280     static constexpr float kYuvToRgbMatrix[] = {
281             1.0f,      1.0f,     1.0f,     0.0f,
282             0.0f,     -0.39465f, 2.03211f, 0.0f,
283             1.13983f, -0.5806f,  0.0f,     0.0f,
284             0.0f,      0.0f,     0.0f,     1.0f
285     };
286 
287     /**
288      * Transform an image using a color matrix.
289      *
290      * Converts a 2D array of vectors of unsigned bytes, multiplying each vectors by a 4x4 matrix
291      * and adding an optional vector.
292      *
293      * Each input vector is composed of 1-4 unsigned bytes. If less than 4 bytes, it's extended to
294      * 4, padding with zeroes. The unsigned bytes are converted from 0-255 to 0.0-1.0 floats
295      * before the multiplication is done.
296      *
297      * The resulting value is normalized from 0.0-1.0 to a 0-255 value and stored in the output.
298      * If the output vector size is less than four, the unused channels are discarded.
299      *
300      * If addVector is null, a vector of zeroes is added, i.e. a noop.
301      *
302      * Check kIdentityMatrix, kGreyScaleColorMatrix, kRgbToYuvMatrix, and kYuvToRgbMatrix for sample
303      * matrices. The YUV conversion may not work for all color spaces.
304      *
305      * @param in The buffer of the image to be converted.
306      * @param out The buffer that receives the converted image.
307      * @param inputVectorSize The number of bytes in each input cell, a value from 1 to 4.
308      * @param outputVectorSize The number of bytes in each output cell, a value from 1 to 4.
309      * @param sizeX The width of both buffers, as a number of 1 to 4 byte cells.
310      * @param sizeY The height of both buffers, as a number of 1 to 4 byte cells.
311      * @param matrix The 4x4 matrix to multiply, in row major format.
312      * @param addVector A vector of four floats that's added to the result of the multiplication.
313      * @param restriction When not null, restricts the operation to a 2D range of pixels.
314      */
315     void colorMatrix(const void* _Nonnull in, void* _Nonnull out, size_t inputVectorSize,
316                      size_t outputVectorSize, size_t sizeX, size_t sizeY,
317                      const float* _Nonnull matrix, const float* _Nullable addVector = nullptr,
318                      const Restriction* _Nullable restriction = nullptr);
319 
320     /**
321      * Convolve a ByteArray.
322      *
323      * Applies a 3x3 or 5x5 convolution to the input array using the provided coefficients.
324      *
325      * For 3x3 convolutions, 9 coefficients must be provided. For 5x5, 25 coefficients are needed.
326      * The coefficients should be provided in row-major format.
327      *
328      * When the square extends past the edge, the edge values will be used as replacement for the
329      * values that's are off boundary.
330      *
331      * Each input cell can either be represented by one to four bytes. Each byte is multiplied
332      * and accumulated independently of the other bytes of the cell.
333      *
334      * An optional range parameter can be set to restrict the operation to a rectangular subset
335      * of each buffer. If provided, the range must be wholly contained with the dimensions
336      * described by sizeX and sizeY.
337      *
338      * The input and output buffers must have the same dimensions. Both buffers should be
339      * large enough for sizeX * sizeY * vectorSize bytes. The buffers have a row-major layout.
340      *
341      * @param in The buffer of the image to be blurred.
342      * @param out The buffer that receives the blurred image.
343      * @param vectorSize The number of bytes in each cell, a value from 1 to 4.
344      * @param sizeX The width of both buffers, as a number of 1 or 4 byte cells.
345      * @param sizeY The height of both buffers, as a number of 1 or 4 byte cells.
346      * @param coefficients 9 or 25 multipliers.
347      * @param restriction When not null, restricts the operation to a 2D range of pixels.
348      */
349     void convolve3x3(const void* _Nonnull in, void* _Nonnull out, size_t vectorSize, size_t sizeX,
350                      size_t sizeY, const float* _Nonnull coefficients,
351                      const Restriction* _Nullable restriction = nullptr);
352 
353     void convolve5x5(const void* _Nonnull in, void* _Nonnull out, size_t vectorSize, size_t sizeX,
354                      size_t sizeY, const float* _Nonnull coefficients,
355                      const Restriction* _Nullable restriction = nullptr);
356 
357     /**
358      * Compute the histogram of an image.
359      *
360      * Tallies how many times each of the 256 possible values of a byte is found in the input.
361      *
362      * An input cell can be represented by one to four bytes. The tally is done independently
363      * for each of the bytes of the cell. Correspondingly, the out array will have
364      * 256 * vectorSize entries. The counts for value 0 are consecutive, followed by those for
365      * value 1, etc.
366      *
367      * An optional range parameter can be set to restrict the operation to a rectangular subset
368      * of each buffer. If provided, the range must be wholly contained with the dimensions
369      * described by sizeX and sizeY.
370      *
371      * The source buffers should be large enough for sizeX * sizeY * vectorSize bytes. The buffers
372      * have a row-major layout. The out buffer should be large enough for 256 * vectorSize ints.
373      *
374      * @param in The buffer of the image to be analyzed.
375      * @param out The resulting vector of counts.
376      * @param sizeX The width of the input buffers, as a number of 1 or 4 byte cells.
377      * @param sizeY The height of the input buffers, as a number of 1 or 4 byte cells.
378      * @param vectorSize The number of bytes in each cell, a value from 1 to 4.
379      * @param restriction When not null, restricts the operation to a 2D range of pixels.
380      */
381     void histogram(const uint8_t* _Nonnull in, int32_t* _Nonnull out, size_t sizeX, size_t sizeY,
382                    size_t vectorSize, const Restriction* _Nullable restriction = nullptr);
383 
384     /**
385      * Compute the histogram of the dot product of an image.
386      *
387      * This method supports cells of 1 to 4 bytes in length. For each cell of the array,
388      * the dot product of its bytes with the provided coefficients is computed. The resulting
389      * floating point value is converted to an unsigned byte and tallied in the histogram.
390      *
391      * If coefficients is null, the coefficients used for RGBA luminosity calculation will be used,
392      * i.e. the values [0.299f, 0.587f, 0.114f, 0.f].
393      *
394      * Each coefficients must be >= 0 and their sum must be 1.0 or less. There must be the same
395      * number of coefficients as vectorSize.
396      *
397      * An optional range parameter can be set to restrict the operation to a rectangular subset
398      * of each buffer. If provided, the range must be wholly contained with the dimensions
399      * described by sizeX and sizeY.
400      *
401      * The source buffers should be large enough for sizeX * sizeY * vectorSize bytes. The buffers
402      * have a row-major layout. The out array should be large enough for 256 ints.
403      *
404      * @param in The buffer of the image to be analyzed.
405      * @param out The resulting vector of counts.
406      * @param sizeX The width of the input buffers, as a number of 1 or 4 byte cells.
407      * @param sizeY The height of the input buffers, as a number of 1 or 4 byte cells.
408      * @param vectorSize The number of bytes in each cell, a value from 1 to 4.
409      * @param coefficients The values used for the dot product. Can be nullptr.
410      * @param restriction When not null, restricts the operation to a 2D range of pixels.
411      */
412     void histogramDot(const uint8_t* _Nonnull in, int32_t* _Nonnull out, size_t sizeX, size_t sizeY,
413                       size_t vectorSize, const float* _Nullable coefficients,
414                       const Restriction* _Nullable restriction = nullptr);
415 
416     /**
417      * Transform an image using a look up table
418      *
419      * Transforms an image by using a per-channel lookup table. Each channel of the input has an
420      * independent lookup table. The tables are 256 entries in size and can cover the full value
421      * range of a byte.
422      *
423      * The input array should be in RGBA format, where four consecutive bytes form an cell.
424      *
425      * An optional range parameter can be set to restrict the operation to a rectangular subset
426      * of each buffer. If provided, the range must be wholly contained with the dimensions
427      * described by sizeX and sizeY.
428      *
429      * The input and output buffers must have the same dimensions. Both buffers should be
430      * large enough for sizeX * sizeY * vectorSize bytes. The buffers have a row-major layout.
431      *
432      * @param in The buffer of the image to be transformed.
433      * @param out The buffer that receives the transformed image.
434      * @param sizeX The width of both buffers, as a number of 4 byte cells.
435      * @param sizeY The height of both buffers, as a number of 4 byte cells.
436      * @param red An array of 256 values that's used to convert the R channel.
437      * @param green An array of 256 values that's used to convert the G channel.
438      * @param blue An array of 256 values that's used to convert the B channel.
439      * @param alpha An array of 256 values that's used to convert the A channel.
440      * @param restriction When not null, restricts the operation to a 2D range of pixels.
441      */
442     void lut(const uint8_t* _Nonnull in, uint8_t* _Nonnull out, size_t sizeX, size_t sizeY,
443              const uint8_t* _Nonnull red, const uint8_t* _Nonnull green,
444              const uint8_t* _Nonnull blue, const uint8_t* _Nonnull alpha,
445              const Restriction* _Nullable restriction = nullptr);
446 
447     /**
448      * Transform an image using a 3D look up table
449      *
450      * Transforms an image, converting RGB to RGBA by using a 3D lookup table. The incoming R, G,
451      * and B values are normalized to the dimensions of the provided 3D buffer. The eight nearest
452      * values in that 3D buffer are sampled and linearly interpolated. The resulting RGBA entry
453      * is stored in the output.
454      *
455      * The input array should be in RGBA format, where four consecutive bytes form an cell.
456      * The fourth byte of each input cell is ignored.
457      *
458      * An optional range parameter can be set to restrict the operation to a rectangular subset
459      * of each buffer. If provided, the range must be wholly contained with the dimensions
460      * described by sizeX and sizeY.
461      *
462      * The input and output buffers must have the same dimensions. Both buffers should be
463      * large enough for sizeX * sizeY * vectorSize bytes. The buffers have a row-major layout.
464      *
465      * @param in The buffer of the image to be transformed.
466      * @param out The buffer that receives the transformed image.
467      * @param sizeX The width of both buffers, as a number of 4 byte cells.
468      * @param sizeY The height of both buffers, as a number of 4 byte cells.
469      * @param cube The translation cube, in row major-format.
470      * @param cubeSizeX The number of RGBA entries in the cube in the X direction.
471      * @param cubeSizeY The number of RGBA entries in the cube in the Y direction.
472      * @param cubeSizeZ The number of RGBA entries in the cube in the Z direction.
473      * @param restriction When not null, restricts the operation to a 2D range of pixels.
474      */
475     void lut3d(const uint8_t* _Nonnull in, uint8_t* _Nonnull out, size_t sizeX, size_t sizeY,
476                const uint8_t* _Nonnull cube, size_t cubeSizeX, size_t cubeSizeY, size_t cubeSizeZ,
477                const Restriction* _Nullable restriction = nullptr);
478 
479     /**
480      * Resize an image.
481      *
482      * Resizes an image using bicubic interpolation.
483      *
484      * This method supports cells of 1 to 4 bytes in length. Each byte of the cell is
485      * interpolated independently from the others.
486      *
487      * An optional range parameter can be set to restrict the operation to a rectangular subset
488      * of the output buffer. The corresponding scaled range of the input will be used.  If provided,
489      * the range must be wholly contained with the dimensions described by outputSizeX and
490      * outputSizeY.
491      *
492      * The input and output buffers have a row-major layout. Both buffers should be
493      * large enough for sizeX * sizeY * vectorSize bytes.
494      *
495      * @param in The buffer of the image to be resized.
496      * @param out The buffer that receives the resized image.
497      * @param inputSizeX The width of the input buffer, as a number of 1-4 byte cells.
498      * @param inputSizeY The height of the input buffer, as a number of 1-4 byte cells.
499      * @param vectorSize The number of bytes in each cell of both buffers. A value from 1 to 4.
500      * @param outputSizeX The width of the output buffer, as a number of 1-4 byte cells.
501      * @param outputSizeY The height of the output buffer, as a number of 1-4 byte cells.
502      * @param restriction When not null, restricts the operation to a 2D range of pixels.
503      */
504     void resize(const uint8_t* _Nonnull in, uint8_t* _Nonnull out, size_t inputSizeX,
505                 size_t inputSizeY, size_t vectorSize, size_t outputSizeX, size_t outputSizeY,
506                 const Restriction* _Nullable restriction = nullptr);
507 
508     /**
509      * The YUV formats supported by yuvToRgb.
510      */
511     enum class YuvFormat {
512         NV21 = 0x11,
513         YV12 = 0x32315659,
514     };
515 
516     /**
517      * Convert an image from YUV to RGB.
518      *
519      * Converts an Android YUV buffer to RGB. The input allocation should be
520      * supplied in a supported YUV format as a YUV cell Allocation.
521      * The output is RGBA; the alpha channel will be set to 255.
522      *
523      * Note that for YV12 and a sizeX that's not a multiple of 32, the
524      * RenderScript Intrinsic may not have converted the image correctly.
525      * This Toolkit method should.
526      *
527      * @param in The buffer of the image to be converted.
528      * @param out The buffer that receives the converted image.
529      * @param sizeX The width in pixels of the image. Must be even.
530      * @param sizeY The height in pixels of the image.
531      * @param format Either YV12 or NV21.
532      */
533     void yuvToRgb(const uint8_t* _Nonnull in, uint8_t* _Nonnull out, size_t sizeX, size_t sizeY,
534                   YuvFormat format);
535 };
536 
537 }  // namespace renderscript
538 }  // namespace android
539 
540 #endif  // ANDROID_RENDERSCRIPT_TOOLKIT_TOOLKIT_H
541