1 /*
2  * Copyright (C) 2012 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 
18 #include "rsCpuIntrinsic.h"
19 #include "rsCpuIntrinsicInlines.h"
20 
21 using namespace android;
22 using namespace android::renderscript;
23 
24 namespace android {
25 namespace renderscript {
26 
27 
28 class RsdCpuScriptIntrinsicBlend : public RsdCpuScriptIntrinsic {
29 public:
30     virtual void populateScript(Script *);
31 
32     virtual ~RsdCpuScriptIntrinsicBlend();
33     RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
34 
35 protected:
36     static void kernel(const RsForEachStubParamStruct *p,
37                           uint32_t xstart, uint32_t xend,
38                           uint32_t instep, uint32_t outstep);
39 };
40 
41 }
42 }
43 
44 
45 enum {
46     BLEND_CLEAR = 0,
47     BLEND_SRC = 1,
48     BLEND_DST = 2,
49     BLEND_SRC_OVER = 3,
50     BLEND_DST_OVER = 4,
51     BLEND_SRC_IN = 5,
52     BLEND_DST_IN = 6,
53     BLEND_SRC_OUT = 7,
54     BLEND_DST_OUT = 8,
55     BLEND_SRC_ATOP = 9,
56     BLEND_DST_ATOP = 10,
57     BLEND_XOR = 11,
58 
59     BLEND_NORMAL = 12,
60     BLEND_AVERAGE = 13,
61     BLEND_MULTIPLY = 14,
62     BLEND_SCREEN = 15,
63     BLEND_DARKEN = 16,
64     BLEND_LIGHTEN = 17,
65     BLEND_OVERLAY = 18,
66     BLEND_HARDLIGHT = 19,
67     BLEND_SOFTLIGHT = 20,
68     BLEND_DIFFERENCE = 21,
69     BLEND_NEGATION = 22,
70     BLEND_EXCLUSION = 23,
71     BLEND_COLOR_DODGE = 24,
72     BLEND_INVERSE_COLOR_DODGE = 25,
73     BLEND_SOFT_DODGE = 26,
74     BLEND_COLOR_BURN = 27,
75     BLEND_INVERSE_COLOR_BURN = 28,
76     BLEND_SOFT_BURN = 29,
77     BLEND_REFLECT = 30,
78     BLEND_GLOW = 31,
79     BLEND_FREEZE = 32,
80     BLEND_HEAT = 33,
81     BLEND_ADD = 34,
82     BLEND_SUBTRACT = 35,
83     BLEND_STAMP = 36,
84     BLEND_RED = 37,
85     BLEND_GREEN = 38,
86     BLEND_BLUE = 39,
87     BLEND_HUE = 40,
88     BLEND_SATURATION = 41,
89     BLEND_COLOR = 42,
90     BLEND_LUMINOSITY = 43
91 };
92 
93 #if defined(ARCH_ARM_USE_INTRINSICS)
94 extern "C" int rsdIntrinsicBlend_K(uchar4 *out, uchar4 const *in, int slot,
95                     uint32_t xstart, uint32_t xend);
96 #endif
97 
98 #if defined(ARCH_X86_HAVE_SSSE3)
99 extern "C" void rsdIntrinsicBlendSrcOver_K(void *dst, const void *src, uint32_t count8);
100 extern "C" void rsdIntrinsicBlendDstOver_K(void *dst, const void *src, uint32_t count8);
101 extern "C" void rsdIntrinsicBlendSrcIn_K(void *dst, const void *src, uint32_t count8);
102 extern "C" void rsdIntrinsicBlendDstIn_K(void *dst, const void *src, uint32_t count8);
103 extern "C" void rsdIntrinsicBlendSrcOut_K(void *dst, const void *src, uint32_t count8);
104 extern "C" void rsdIntrinsicBlendDstOut_K(void *dst, const void *src, uint32_t count8);
105 extern "C" void rsdIntrinsicBlendSrcAtop_K(void *dst, const void *src, uint32_t count8);
106 extern "C" void rsdIntrinsicBlendDstAtop_K(void *dst, const void *src, uint32_t count8);
107 extern "C" void rsdIntrinsicBlendXor_K(void *dst, const void *src, uint32_t count8);
108 extern "C" void rsdIntrinsicBlendMultiply_K(void *dst, const void *src, uint32_t count8);
109 extern "C" void rsdIntrinsicBlendAdd_K(void *dst, const void *src, uint32_t count8);
110 extern "C" void rsdIntrinsicBlendSub_K(void *dst, const void *src, uint32_t count8);
111 #endif
112 
kernel(const RsForEachStubParamStruct * p,uint32_t xstart,uint32_t xend,uint32_t instep,uint32_t outstep)113 void RsdCpuScriptIntrinsicBlend::kernel(const RsForEachStubParamStruct *p,
114                                         uint32_t xstart, uint32_t xend,
115                                         uint32_t instep, uint32_t outstep) {
116     RsdCpuScriptIntrinsicBlend *cp = (RsdCpuScriptIntrinsicBlend *)p->usr;
117 
118     // instep/outstep can be ignored--sizeof(uchar4) known at compile time
119     uchar4 *out = (uchar4 *)p->out;
120     uchar4 *in = (uchar4 *)p->in;
121     uint32_t x1 = xstart;
122     uint32_t x2 = xend;
123 
124 #if defined(ARCH_ARM_USE_INTRINSICS) && !defined(ARCH_ARM64_USE_INTRINSICS)
125     if (gArchUseSIMD) {
126         if (rsdIntrinsicBlend_K(out, in, p->slot, x1, x2) >= 0)
127             return;
128     }
129 #endif
130     switch (p->slot) {
131     case BLEND_CLEAR:
132         for (;x1 < x2; x1++, out++) {
133             *out = 0;
134         }
135         break;
136     case BLEND_SRC:
137         for (;x1 < x2; x1++, out++, in++) {
138           *out = *in;
139         }
140         break;
141     //BLEND_DST is a NOP
142     case BLEND_DST:
143         break;
144     case BLEND_SRC_OVER:
145     #if defined(ARCH_X86_HAVE_SSSE3)
146         if (gArchUseSIMD) {
147             if ((x1 + 8) < x2) {
148                 uint32_t len = (x2 - x1) >> 3;
149                 rsdIntrinsicBlendSrcOver_K(out, in, len);
150                 x1 += len << 3;
151                 out += len << 3;
152                 in += len << 3;
153             }
154         }
155     #endif
156         for (;x1 < x2; x1++, out++, in++) {
157             short4 in_s = convert_short4(*in);
158             short4 out_s = convert_short4(*out);
159             in_s = in_s + ((out_s * (short4)(255 - in_s.w)) >> (short4)8);
160             *out = convert_uchar4(in_s);
161         }
162         break;
163     case BLEND_DST_OVER:
164     #if defined(ARCH_X86_HAVE_SSSE3)
165         if (gArchUseSIMD) {
166             if ((x1 + 8) < x2) {
167                 uint32_t len = (x2 - x1) >> 3;
168                 rsdIntrinsicBlendDstOver_K(out, in, len);
169                 x1 += len << 3;
170                 out += len << 3;
171                 in += len << 3;
172             }
173         }
174      #endif
175         for (;x1 < x2; x1++, out++, in++) {
176             short4 in_s = convert_short4(*in);
177             short4 out_s = convert_short4(*out);
178             in_s = out_s + ((in_s * (short4)(255 - out_s.w)) >> (short4)8);
179             *out = convert_uchar4(in_s);
180         }
181         break;
182     case BLEND_SRC_IN:
183     #if defined(ARCH_X86_HAVE_SSSE3)
184         if (gArchUseSIMD) {
185             if ((x1 + 8) < x2) {
186                 uint32_t len = (x2 - x1) >> 3;
187                 rsdIntrinsicBlendSrcIn_K(out, in, len);
188                 x1 += len << 3;
189                 out += len << 3;
190                 in += len << 3;
191             }
192         }
193     #endif
194         for (;x1 < x2; x1++, out++, in++) {
195             short4 in_s = convert_short4(*in);
196             in_s = (in_s * out->w) >> (short4)8;
197             *out = convert_uchar4(in_s);
198         }
199         break;
200     case BLEND_DST_IN:
201     #if defined(ARCH_X86_HAVE_SSSE3)
202         if (gArchUseSIMD) {
203             if ((x1 + 8) < x2) {
204                 uint32_t len = (x2 - x1) >> 3;
205                 rsdIntrinsicBlendDstIn_K(out, in, len);
206                 x1 += len << 3;
207                 out += len << 3;
208                 in += len << 3;
209             }
210         }
211      #endif
212         for (;x1 < x2; x1++, out++, in++) {
213             short4 out_s = convert_short4(*out);
214             out_s = (out_s * in->w) >> (short4)8;
215             *out = convert_uchar4(out_s);
216         }
217         break;
218     case BLEND_SRC_OUT:
219     #if defined(ARCH_X86_HAVE_SSSE3)
220         if (gArchUseSIMD) {
221             if ((x1 + 8) < x2) {
222                 uint32_t len = (x2 - x1) >> 3;
223                 rsdIntrinsicBlendSrcOut_K(out, in, len);
224                 x1 += len << 3;
225                 out += len << 3;
226                 in += len << 3;
227             }
228         }
229     #endif
230         for (;x1 < x2; x1++, out++, in++) {
231             short4 in_s = convert_short4(*in);
232             in_s = (in_s * (short4)(255 - out->w)) >> (short4)8;
233             *out = convert_uchar4(in_s);
234         }
235         break;
236     case BLEND_DST_OUT:
237     #if defined(ARCH_X86_HAVE_SSSE3)
238         if (gArchUseSIMD) {
239             if ((x1 + 8) < x2) {
240                 uint32_t len = (x2 - x1) >> 3;
241                 rsdIntrinsicBlendDstOut_K(out, in, len);
242                 x1 += len << 3;
243                 out += len << 3;
244                 in += len << 3;
245             }
246         }
247     #endif
248         for (;x1 < x2; x1++, out++, in++) {
249             short4 out_s = convert_short4(*out);
250             out_s = (out_s * (short4)(255 - in->w)) >> (short4)8;
251             *out = convert_uchar4(out_s);
252         }
253         break;
254     case BLEND_SRC_ATOP:
255     #if defined(ARCH_X86_HAVE_SSSE3)
256         if (gArchUseSIMD) {
257             if ((x1 + 8) < x2) {
258                 uint32_t len = (x2 - x1) >> 3;
259                 rsdIntrinsicBlendSrcAtop_K(out, in, len);
260                 x1 += len << 3;
261                 out += len << 3;
262                 in += len << 3;
263             }
264         }
265     #endif
266         for (;x1 < x2; x1++, out++, in++) {
267             short4 in_s = convert_short4(*in);
268             short4 out_s = convert_short4(*out);
269             out_s.xyz = ((in_s.xyz * out_s.w) +
270               (out_s.xyz * ((short3)255 - (short3)in_s.w))) >> (short3)8;
271             *out = convert_uchar4(out_s);
272         }
273         break;
274     case BLEND_DST_ATOP:
275     #if defined(ARCH_X86_HAVE_SSSE3)
276         if (gArchUseSIMD) {
277             if ((x1 + 8) < x2) {
278                 uint32_t len = (x2 - x1) >> 3;
279                 rsdIntrinsicBlendDstAtop_K(out, in, len);
280                 x1 += len << 3;
281                 out += len << 3;
282                 in += len << 3;
283             }
284         }
285      #endif
286         for (;x1 < x2; x1++, out++, in++) {
287             short4 in_s = convert_short4(*in);
288             short4 out_s = convert_short4(*out);
289             out_s.xyz = ((out_s.xyz * in_s.w) +
290               (in_s.xyz * ((short3)255 - (short3)out_s.w))) >> (short3)8;
291             *out = convert_uchar4(out_s);
292         }
293         break;
294     case BLEND_XOR:
295     #if defined(ARCH_X86_HAVE_SSSE3)
296         if (gArchUseSIMD) {
297             if ((x1 + 8) < x2) {
298                 uint32_t len = (x2 - x1) >> 3;
299                 rsdIntrinsicBlendXor_K(out, in, len);
300                 x1 += len << 3;
301                 out += len << 3;
302                 in += len << 3;
303             }
304         }
305     #endif
306         for (;x1 < x2; x1++, out++, in++) {
307             *out = *in ^ *out;
308         }
309         break;
310     case BLEND_NORMAL:
311         ALOGE("Called unimplemented blend intrinsic BLEND_NORMAL");
312         rsAssert(false);
313         break;
314     case BLEND_AVERAGE:
315         ALOGE("Called unimplemented blend intrinsic BLEND_AVERAGE");
316         rsAssert(false);
317         break;
318     case BLEND_MULTIPLY:
319     #if defined(ARCH_X86_HAVE_SSSE3)
320         if (gArchUseSIMD) {
321             if ((x1 + 8) < x2) {
322                 uint32_t len = (x2 - x1) >> 3;
323                 rsdIntrinsicBlendMultiply_K(out, in, len);
324                 x1 += len << 3;
325                 out += len << 3;
326                 in += len << 3;
327             }
328         }
329     #endif
330         for (;x1 < x2; x1++, out++, in++) {
331           *out = convert_uchar4((convert_short4(*in) * convert_short4(*out))
332                                 >> (short4)8);
333         }
334         break;
335     case BLEND_SCREEN:
336         ALOGE("Called unimplemented blend intrinsic BLEND_SCREEN");
337         rsAssert(false);
338         break;
339     case BLEND_DARKEN:
340         ALOGE("Called unimplemented blend intrinsic BLEND_DARKEN");
341         rsAssert(false);
342         break;
343     case BLEND_LIGHTEN:
344         ALOGE("Called unimplemented blend intrinsic BLEND_LIGHTEN");
345         rsAssert(false);
346         break;
347     case BLEND_OVERLAY:
348         ALOGE("Called unimplemented blend intrinsic BLEND_OVERLAY");
349         rsAssert(false);
350         break;
351     case BLEND_HARDLIGHT:
352         ALOGE("Called unimplemented blend intrinsic BLEND_HARDLIGHT");
353         rsAssert(false);
354         break;
355     case BLEND_SOFTLIGHT:
356         ALOGE("Called unimplemented blend intrinsic BLEND_SOFTLIGHT");
357         rsAssert(false);
358         break;
359     case BLEND_DIFFERENCE:
360         ALOGE("Called unimplemented blend intrinsic BLEND_DIFFERENCE");
361         rsAssert(false);
362         break;
363     case BLEND_NEGATION:
364         ALOGE("Called unimplemented blend intrinsic BLEND_NEGATION");
365         rsAssert(false);
366         break;
367     case BLEND_EXCLUSION:
368         ALOGE("Called unimplemented blend intrinsic BLEND_EXCLUSION");
369         rsAssert(false);
370         break;
371     case BLEND_COLOR_DODGE:
372         ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_DODGE");
373         rsAssert(false);
374         break;
375     case BLEND_INVERSE_COLOR_DODGE:
376         ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_DODGE");
377         rsAssert(false);
378         break;
379     case BLEND_SOFT_DODGE:
380         ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_DODGE");
381         rsAssert(false);
382         break;
383     case BLEND_COLOR_BURN:
384         ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_BURN");
385         rsAssert(false);
386         break;
387     case BLEND_INVERSE_COLOR_BURN:
388         ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_BURN");
389         rsAssert(false);
390         break;
391     case BLEND_SOFT_BURN:
392         ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_BURN");
393         rsAssert(false);
394         break;
395     case BLEND_REFLECT:
396         ALOGE("Called unimplemented blend intrinsic BLEND_REFLECT");
397         rsAssert(false);
398         break;
399     case BLEND_GLOW:
400         ALOGE("Called unimplemented blend intrinsic BLEND_GLOW");
401         rsAssert(false);
402         break;
403     case BLEND_FREEZE:
404         ALOGE("Called unimplemented blend intrinsic BLEND_FREEZE");
405         rsAssert(false);
406         break;
407     case BLEND_HEAT:
408         ALOGE("Called unimplemented blend intrinsic BLEND_HEAT");
409         rsAssert(false);
410         break;
411     case BLEND_ADD:
412     #if defined(ARCH_X86_HAVE_SSSE3)
413         if (gArchUseSIMD) {
414             if((x1 + 8) < x2) {
415                 uint32_t len = (x2 - x1) >> 3;
416                 rsdIntrinsicBlendAdd_K(out, in, len);
417                 x1 += len << 3;
418                 out += len << 3;
419                 in += len << 3;
420             }
421         }
422     #endif
423         for (;x1 < x2; x1++, out++, in++) {
424             uint32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
425                 oR = out->x, oG = out->y, oB = out->z, oA = out->w;
426             out->x = (oR + iR) > 255 ? 255 : oR + iR;
427             out->y = (oG + iG) > 255 ? 255 : oG + iG;
428             out->z = (oB + iB) > 255 ? 255 : oB + iB;
429             out->w = (oA + iA) > 255 ? 255 : oA + iA;
430         }
431         break;
432     case BLEND_SUBTRACT:
433     #if defined(ARCH_X86_HAVE_SSSE3)
434         if (gArchUseSIMD) {
435             if((x1 + 8) < x2) {
436                 uint32_t len = (x2 - x1) >> 3;
437                 rsdIntrinsicBlendSub_K(out, in, len);
438                 x1 += len << 3;
439                 out += len << 3;
440                 in += len << 3;
441             }
442         }
443     #endif
444         for (;x1 < x2; x1++, out++, in++) {
445             int32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
446                 oR = out->x, oG = out->y, oB = out->z, oA = out->w;
447             out->x = (oR - iR) < 0 ? 0 : oR - iR;
448             out->y = (oG - iG) < 0 ? 0 : oG - iG;
449             out->z = (oB - iB) < 0 ? 0 : oB - iB;
450             out->w = (oA - iA) < 0 ? 0 : oA - iA;
451         }
452         break;
453     case BLEND_STAMP:
454         ALOGE("Called unimplemented blend intrinsic BLEND_STAMP");
455         rsAssert(false);
456         break;
457     case BLEND_RED:
458         ALOGE("Called unimplemented blend intrinsic BLEND_RED");
459         rsAssert(false);
460         break;
461     case BLEND_GREEN:
462         ALOGE("Called unimplemented blend intrinsic BLEND_GREEN");
463         rsAssert(false);
464         break;
465     case BLEND_BLUE:
466         ALOGE("Called unimplemented blend intrinsic BLEND_BLUE");
467         rsAssert(false);
468         break;
469     case BLEND_HUE:
470         ALOGE("Called unimplemented blend intrinsic BLEND_HUE");
471         rsAssert(false);
472         break;
473     case BLEND_SATURATION:
474         ALOGE("Called unimplemented blend intrinsic BLEND_SATURATION");
475         rsAssert(false);
476         break;
477     case BLEND_COLOR:
478         ALOGE("Called unimplemented blend intrinsic BLEND_COLOR");
479         rsAssert(false);
480         break;
481     case BLEND_LUMINOSITY:
482         ALOGE("Called unimplemented blend intrinsic BLEND_LUMINOSITY");
483         rsAssert(false);
484         break;
485 
486     default:
487         ALOGE("Called unimplemented value %d", p->slot);
488         rsAssert(false);
489 
490     }
491 }
492 
493 
RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)494 RsdCpuScriptIntrinsicBlend::RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx,
495                                                        const Script *s, const Element *e)
496             : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_BLEND) {
497 
498     mRootPtr = &kernel;
499 }
500 
~RsdCpuScriptIntrinsicBlend()501 RsdCpuScriptIntrinsicBlend::~RsdCpuScriptIntrinsicBlend() {
502 }
503 
populateScript(Script * s)504 void RsdCpuScriptIntrinsicBlend::populateScript(Script *s) {
505     s->mHal.info.exportedVariableCount = 0;
506 }
507 
rsdIntrinsic_Blend(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)508 RsdCpuScriptImpl * rsdIntrinsic_Blend(RsdCpuReferenceImpl *ctx,
509                                       const Script *s, const Element *e) {
510     return new RsdCpuScriptIntrinsicBlend(ctx, s, e);
511 }
512 
513 
514 
515