1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <cstdint>
18 
19 #include "RenderScriptToolkit.h"
20 #include "TaskProcessor.h"
21 #include "Utils.h"
22 
23 #define LOG_TAG "renderscript.toolkit.YuvToRgb"
24 
25 namespace android {
26 namespace renderscript {
27 
roundUpTo16(size_t val)28 inline size_t roundUpTo16(size_t val) {
29     return (val + 15) & ~15;
30 }
31 
32 class YuvToRgbTask : public Task {
33     uchar4* mOut;
34     size_t mCstep;
35     size_t mStrideY;
36     size_t mStrideU;
37     size_t mStrideV;
38     const uchar* mInY;
39     const uchar* mInU;
40     const uchar* mInV;
41 
42     void kernel(uchar4* out, uint32_t xstart, uint32_t xend, uint32_t currentY);
43     // Process a 2D tile of the overall work. threadIndex identifies which thread does the work.
44     virtual void processData(int threadIndex, size_t startX, size_t startY, size_t endX,
45                              size_t endY) override;
46 
47    public:
YuvToRgbTask(const uint8_t * input,uint8_t * output,size_t sizeX,size_t sizeY,RenderScriptToolkit::YuvFormat format)48     YuvToRgbTask(const uint8_t* input, uint8_t* output, size_t sizeX, size_t sizeY,
49                  RenderScriptToolkit::YuvFormat format)
50         : Task{sizeX, sizeY, 4, false, nullptr}, mOut{reinterpret_cast<uchar4*>(output)} {
51         switch (format) {
52             case RenderScriptToolkit::YuvFormat::NV21:
53                 mCstep = 2;
54                 mStrideY = sizeX;
55                 mStrideU = mStrideY;
56                 mStrideV = mStrideY;
57                 mInY = reinterpret_cast<const uchar*>(input);
58                 mInV = reinterpret_cast<const uchar*>(input + mStrideY * sizeY);
59                 mInU = mInV + 1;
60                 break;
61             case RenderScriptToolkit::YuvFormat::YV12:
62                 mCstep = 1;
63                 mStrideY = roundUpTo16(sizeX);
64                 mStrideU = roundUpTo16(mStrideY >> 1);
65                 mStrideV = mStrideU;
66                 mInY = reinterpret_cast<const uchar*>(input);
67                 mInU = reinterpret_cast<const uchar*>(input + mStrideY * sizeY);
68                 mInV = mInU + mStrideV * sizeY / 2;
69                 break;
70         }
71     }
72 };
73 
processData(int,size_t startX,size_t startY,size_t endX,size_t endY)74 void YuvToRgbTask::processData(int /* threadIndex */, size_t startX, size_t startY, size_t endX,
75                                size_t endY) {
76     for (size_t y = startY; y < endY; y++) {
77         size_t offset = mSizeX * y + startX;
78         uchar4* out = mOut + offset;
79         kernel(out, startX, endX, y);
80     }
81 }
82 
rsYuvToRGBA_uchar4(uchar y,uchar u,uchar v)83 static uchar4 rsYuvToRGBA_uchar4(uchar y, uchar u, uchar v) {
84     int16_t Y = ((int16_t)y) - 16;
85     int16_t U = ((int16_t)u) - 128;
86     int16_t V = ((int16_t)v) - 128;
87 
88     short4 p;
89     p.x = (Y * 298 + V * 409 + 128) >> 8;
90     p.y = (Y * 298 - U * 100 - V * 208 + 128) >> 8;
91     p.z = (Y * 298 + U * 516 + 128) >> 8;
92     p.w = 255;
93     if(p.x < 0) {
94         p.x = 0;
95     }
96     if(p.x > 255) {
97         p.x = 255;
98     }
99     if(p.y < 0) {
100         p.y = 0;
101     }
102     if(p.y > 255) {
103         p.y = 255;
104     }
105     if(p.z < 0) {
106         p.z = 0;
107     }
108     if(p.z > 255) {
109         p.z = 255;
110     }
111 
112     return (uchar4){static_cast<uchar>(p.x), static_cast<uchar>(p.y),
113                     static_cast<uchar>(p.z), static_cast<uchar>(p.w)};
114 }
115 
116 extern "C" void rsdIntrinsicYuv_K(void *dst, const uchar *Y, const uchar *uv, uint32_t xstart,
117                                   size_t xend);
118 extern "C" void rsdIntrinsicYuvR_K(void *dst, const uchar *Y, const uchar *uv, uint32_t xstart,
119                                    size_t xend);
120 extern "C" void rsdIntrinsicYuv2_K(void *dst, const uchar *Y, const uchar *u, const uchar *v,
121                                    size_t xstart, size_t xend);
122 
kernel(uchar4 * out,uint32_t xstart,uint32_t xend,uint32_t currentY)123 void YuvToRgbTask::kernel(uchar4 *out, uint32_t xstart, uint32_t xend, uint32_t currentY) {
124     //ALOGI("kernel out %p, xstart=%u, xend=%u, currentY=%u", out, xstart, xend, currentY);
125 
126     const uchar *y = mInY + (currentY * mStrideY);
127     const uchar *v = mInV + ((currentY >> 1) * mStrideV);
128     const uchar *u = mInU + ((currentY >> 1) * mStrideU);
129 
130     //ALOGI("pinY %p, pinV %p, pinU %p", pinY, pinV, pinU);
131 
132     uint32_t x1 = xstart;
133     uint32_t x2 = xend;
134 
135     /*
136     ALOGE("pinY, %p, Y, %p, currentY, %d, strideY, %zu", pinY, y, currentY, mStrideY);
137     ALOGE("pinU, %p, U, %p, currentY, %d, strideU, %zu", pinU, u, currentY, mStrideU);
138     ALOGE("pinV, %p, V, %p, currentY, %d, strideV, %zu", pinV, v, currentY, mStrideV);
139     ALOGE("dimX, %d, dimY, %d", cp->alloc->mHal.drvState.lod[0].dimX,
140           cp->alloc->mHal.drvState.lod[0].dimY);
141     ALOGE("info->dim.x, %d, info->dim.y, %d", info->dim.x, info->dim.y);
142     uchar* pinY = (uchar*)mInY;
143     uchar* pinU = (uchar*)mInU;
144     uchar* pinV = (uchar*)mInV;
145     ALOGE("Y %p %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx "
146           "%02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx",
147           pinY, pinY[0], pinY[1], pinY[2], pinY[3], pinY[4], pinY[5], pinY[6], pinY[7], pinY[8],
148           pinY[9], pinY[10], pinY[11], pinY[12], pinY[13], pinY[14], pinY[15]);
149     ALOGE("Y %p %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx "
150           "%02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx",
151           pinY, pinY[16], pinY[17], pinY[18], pinY[19], pinY[20], pinY[21], pinY[22], pinY[23],
152           pinY[24], pinY[25], pinY[26], pinY[27], pinY[28], pinY[29], pinY[30], pinY[31]);
153     ALOGE("Y %p %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx "
154           "%02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx",
155           pinY, pinY[32], pinY[33], pinY[34], pinY[35], pinY[36], pinY[37], pinY[38], pinY[39],
156           pinY[40], pinY[41], pinY[42], pinY[43], pinY[44], pinY[45], pinY[46], pinY[47]);
157 
158     ALOGE("U %p %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx "
159           "%02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx",
160           pinU, pinU[0], pinU[1], pinU[2], pinU[3], pinU[4], pinU[5], pinU[6], pinU[7], pinU[8],
161           pinU[9], pinU[10], pinU[11], pinU[12], pinU[13], pinU[14], pinU[15]);
162     ALOGE("U %p %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx "
163           "%02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx",
164           pinU, pinU[16], pinU[17], pinU[18], pinU[19], pinU[20], pinU[21], pinU[22], pinU[23],
165           pinU[24], pinU[25], pinU[26], pinU[27], pinU[28], pinU[29], pinU[30], pinU[31]);
166     ALOGE("U %p %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx "
167           "%02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx",
168           pinU, pinU[32], pinU[33], pinU[34], pinU[35], pinU[36], pinU[37], pinU[38], pinU[39],
169           pinU[40], pinU[41], pinU[42], pinU[43], pinU[44], pinU[45], pinU[46], pinU[47]);
170 
171     ALOGE("V %p %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx "
172           "%02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx",
173           pinV, pinV[0], pinV[1], pinV[2], pinV[3], pinV[4], pinV[5], pinV[6], pinV[7], pinV[8],
174           pinV[9], pinV[10], pinV[11], pinV[12], pinV[13], pinV[14], pinV[15]);
175     ALOGE("V %p %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx "
176           "%02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx",
177           pinV, pinV[16], pinV[17], pinV[18], pinV[19], pinV[20], pinV[21], pinV[22], pinV[23],
178           pinV[24], pinV[25], pinV[26], pinV[27], pinV[28], pinV[29], pinV[30], pinV[31]);
179     ALOGE("V %p %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx "
180           "%02hhx %02hhx %02hhx %02hhx %02hhx %02hhx %02hhx",
181           pinV, pinV[32], pinV[33], pinV[34], pinV[35], pinV[36], pinV[37], pinV[38], pinV[39],
182           pinV[40], pinV[41], pinV[42], pinV[43], pinV[44], pinV[45], pinV[46], pinV[47]);
183     */
184 
185     /* If we start on an odd pixel then deal with it here and bump things along
186      * so that subsequent code can carry on with even-odd pairing assumptions.
187      */
188     if((x1 & 1) && (x2 > x1)) {
189         int cx = (x1 >> 1) * mCstep;
190         *out = rsYuvToRGBA_uchar4(y[x1], u[cx], v[cx]);
191         out++;
192         x1++;
193     }
194 
195 #if defined(ARCH_ARM_USE_INTRINSICS)
196     if((x2 > x1) && mUsesSimd) {
197         int32_t len = x2 - x1;
198         if (mCstep == 1) {
199             rsdIntrinsicYuv2_K(out, y, u, v, x1, x2);
200             x1 += len;
201             out += len;
202         } else if (mCstep == 2) {
203             // Check for proper interleave
204             intptr_t ipu = (intptr_t)u;
205             intptr_t ipv = (intptr_t)v;
206 
207             if (ipu == (ipv + 1)) {
208                 rsdIntrinsicYuv_K(out, y, v, x1, x2);
209                 x1 += len;
210                 out += len;
211             } else if (ipu == (ipv - 1)) {
212                 rsdIntrinsicYuvR_K(out, y, u, x1, x2);
213                 x1 += len;
214                 out += len;
215             }
216         }
217     }
218 #endif
219 
220     if(x2 > x1) {
221        // ALOGE("y %i  %i  %i", currentY, x1, x2);
222         while(x1 < x2) {
223             int cx = (x1 >> 1) * mCstep;
224             *out = rsYuvToRGBA_uchar4(y[x1], u[cx], v[cx]);
225             out++;
226             x1++;
227             *out = rsYuvToRGBA_uchar4(y[x1], u[cx], v[cx]);
228             out++;
229             x1++;
230         }
231     }
232 }
233 
yuvToRgb(const uint8_t * input,uint8_t * output,size_t sizeX,size_t sizeY,YuvFormat format)234 void RenderScriptToolkit::yuvToRgb(const uint8_t* input, uint8_t* output, size_t sizeX,
235                                    size_t sizeY, YuvFormat format) {
236     YuvToRgbTask task(input, output, sizeX, sizeY, format);
237     processor->doTask(&task);
238 }
239 
240 }  // namespace renderscript
241 }  // namespace android
242