1 /* libs/pixelflinger/codeflinger/blending.cpp
2 **
3 ** Copyright 2006, The Android Open Source Project
4 **
5 ** Licensed under the Apache License, Version 2.0 (the "License");
6 ** you may not use this file except in compliance with the License.
7 ** You may obtain a copy of the License at
8 **
9 **     http://www.apache.org/licenses/LICENSE-2.0
10 **
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 */
17 
18 #include <assert.h>
19 #include <stdint.h>
20 #include <stdlib.h>
21 #include <stdio.h>
22 #include <sys/types.h>
23 
24 #include <cutils/log.h>
25 
26 #include "GGLAssembler.h"
27 
28 
29 namespace android {
30 
build_fog(component_t & temp,int component,Scratch & regs)31 void GGLAssembler::build_fog(
32                         component_t& temp,      // incomming fragment / output
33                         int component,
34                         Scratch& regs)
35 {
36    if (mInfo[component].fog) {
37         Scratch scratches(registerFile());
38         comment("fog");
39 
40         integer_t fragment(temp.reg, temp.h, temp.flags);
41         if (!(temp.flags & CORRUPTIBLE)) {
42             temp.reg = regs.obtain();
43             temp.flags |= CORRUPTIBLE;
44         }
45 
46         integer_t fogColor(scratches.obtain(), 8, CORRUPTIBLE);
47         LDRB(AL, fogColor.reg, mBuilderContext.Rctx,
48                 immed12_pre(GGL_OFFSETOF(state.fog.color[component])));
49 
50         integer_t factor(scratches.obtain(), 16, CORRUPTIBLE);
51         CONTEXT_LOAD(factor.reg, generated_vars.f);
52 
53         // clamp fog factor (TODO: see if there is a way to guarantee
54         // we won't overflow, when setting the iterators)
55         BIC(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, ASR, 31));
56         CMP(AL, factor.reg, imm( 0x10000 ));
57         MOV(HS, 0, factor.reg, imm( 0x10000 ));
58 
59         build_blendFOneMinusF(temp, factor, fragment, fogColor);
60     }
61 }
62 
build_blending(component_t & temp,const pixel_t & pixel,int component,Scratch & regs)63 void GGLAssembler::build_blending(
64                         component_t& temp,      // incomming fragment / output
65                         const pixel_t& pixel,   // framebuffer
66                         int component,
67                         Scratch& regs)
68 {
69    if (!mInfo[component].blend)
70         return;
71 
72     int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
73     int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
74     if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA)
75         fs = GGL_ONE;
76     const int blending = blending_codes(fs, fd);
77     if (!temp.size()) {
78         // here, blending will produce something which doesn't depend on
79         // that component (eg: GL_ZERO:GL_*), so the register has not been
80         // allocated yet. Will never be used as a source.
81         temp = component_t(regs.obtain(), CORRUPTIBLE);
82     }
83 
84     // we are doing real blending...
85     // fb:          extracted dst
86     // fragment:    extracted src
87     // temp:        component_t(fragment) and result
88 
89     // scoped register allocator
90     Scratch scratches(registerFile());
91     comment("blending");
92 
93     // we can optimize these cases a bit...
94     // (1) saturation is not needed
95     // (2) we can use only one multiply instead of 2
96     // (3) we can reduce the register pressure
97     //      R = S*f + D*(1-f) = (S-D)*f + D
98     //      R = S*(1-f) + D*f = (D-S)*f + S
99 
100     const bool same_factor_opt1 =
101         (fs==GGL_DST_COLOR && fd==GGL_ONE_MINUS_DST_COLOR) ||
102         (fs==GGL_SRC_COLOR && fd==GGL_ONE_MINUS_SRC_COLOR) ||
103         (fs==GGL_DST_ALPHA && fd==GGL_ONE_MINUS_DST_ALPHA) ||
104         (fs==GGL_SRC_ALPHA && fd==GGL_ONE_MINUS_SRC_ALPHA);
105 
106     const bool same_factor_opt2 =
107         (fs==GGL_ONE_MINUS_DST_COLOR && fd==GGL_DST_COLOR) ||
108         (fs==GGL_ONE_MINUS_SRC_COLOR && fd==GGL_SRC_COLOR) ||
109         (fs==GGL_ONE_MINUS_DST_ALPHA && fd==GGL_DST_ALPHA) ||
110         (fs==GGL_ONE_MINUS_SRC_ALPHA && fd==GGL_SRC_ALPHA);
111 
112 
113     // XXX: we could also optimize these cases:
114     // R = S*f + D*f = (S+D)*f
115     // R = S*(1-f) + D*(1-f) = (S+D)*(1-f)
116     // R = S*D + D*S = 2*S*D
117 
118 
119     // see if we need to extract 'component' from the destination (fb)
120     integer_t fb;
121     if (blending & (BLEND_DST|FACTOR_DST)) {
122         fb.setTo(scratches.obtain(), 32);
123         extract(fb, pixel, component);
124         if (mDithering) {
125             // XXX: maybe what we should do instead, is simply
126             // expand fb -or- fragment to the larger of the two
127             if (fb.size() < temp.size()) {
128                 // for now we expand 'fb' to min(fragment, 8)
129                 int new_size = temp.size() < 8 ? temp.size() : 8;
130                 expand(fb, fb, new_size);
131             }
132         }
133     }
134 
135 
136     // convert input fragment to integer_t
137     if (temp.l && (temp.flags & CORRUPTIBLE)) {
138         MOV(AL, 0, temp.reg, reg_imm(temp.reg, LSR, temp.l));
139         temp.h -= temp.l;
140         temp.l = 0;
141     }
142     integer_t fragment(temp.reg, temp.size(), temp.flags);
143 
144     // if not done yet, convert input fragment to integer_t
145     if (temp.l) {
146         // here we know temp is not CORRUPTIBLE
147         fragment.reg = scratches.obtain();
148         MOV(AL, 0, fragment.reg, reg_imm(temp.reg, LSR, temp.l));
149         fragment.flags |= CORRUPTIBLE;
150     }
151 
152     if (!(temp.flags & CORRUPTIBLE)) {
153         // temp is not corruptible, but since it's the destination it
154         // will be modified, so we need to allocate a new register.
155         temp.reg = regs.obtain();
156         temp.flags &= ~CORRUPTIBLE;
157         fragment.flags &= ~CORRUPTIBLE;
158     }
159 
160     if ((blending & BLEND_SRC) && !same_factor_opt1) {
161         // source (fragment) is needed for the blending stage
162         // so it's not CORRUPTIBLE (unless we're doing same_factor_opt1)
163         fragment.flags &= ~CORRUPTIBLE;
164     }
165 
166 
167     if (same_factor_opt1) {
168         //  R = S*f + D*(1-f) = (S-D)*f + D
169         integer_t factor;
170         build_blend_factor(factor, fs,
171                 component, pixel, fragment, fb, scratches);
172         // fb is always corruptible from this point
173         fb.flags |= CORRUPTIBLE;
174         build_blendFOneMinusF(temp, factor, fragment, fb);
175     } else if (same_factor_opt2) {
176         //  R = S*(1-f) + D*f = (D-S)*f + S
177         integer_t factor;
178         // fb is always corrruptible here
179         fb.flags |= CORRUPTIBLE;
180         build_blend_factor(factor, fd,
181                 component, pixel, fragment, fb, scratches);
182         build_blendOneMinusFF(temp, factor, fragment, fb);
183     } else {
184         integer_t src_factor;
185         integer_t dst_factor;
186 
187         // if destination (fb) is not needed for the blending stage,
188         // then it can be marked as CORRUPTIBLE
189         if (!(blending & BLEND_DST)) {
190             fb.flags |= CORRUPTIBLE;
191         }
192 
193         // XXX: try to mark some registers as CORRUPTIBLE
194         // in most case we could make those corruptible
195         // when we're processing the last component
196         // but not always, for instance
197         //    when fragment is constant and not reloaded
198         //    when fb is needed for logic-ops or masking
199         //    when a register is aliased (for instance with mAlphaSource)
200 
201         // blend away...
202         if (fs==GGL_ZERO) {
203             if (fd==GGL_ZERO) {         // R = 0
204                 // already taken care of
205             } else if (fd==GGL_ONE) {   // R = D
206                 // already taken care of
207             } else {                    // R = D*fd
208                 // compute fd
209                 build_blend_factor(dst_factor, fd,
210                         component, pixel, fragment, fb, scratches);
211                 mul_factor(temp, fb, dst_factor);
212             }
213         } else if (fs==GGL_ONE) {
214             if (fd==GGL_ZERO) {         // R = S
215                 // NOP, taken care of
216             } else if (fd==GGL_ONE) {   // R = S + D
217                 component_add(temp, fb, fragment); // args order matters
218                 component_sat(temp);
219             } else {                    // R = S + D*fd
220                 // compute fd
221                 build_blend_factor(dst_factor, fd,
222                         component, pixel, fragment, fb, scratches);
223                 mul_factor_add(temp, fb, dst_factor, component_t(fragment));
224                 component_sat(temp);
225             }
226         } else {
227             // compute fs
228             build_blend_factor(src_factor, fs,
229                     component, pixel, fragment, fb, scratches);
230             if (fd==GGL_ZERO) {         // R = S*fs
231                 mul_factor(temp, fragment, src_factor);
232             } else if (fd==GGL_ONE) {   // R = S*fs + D
233                 mul_factor_add(temp, fragment, src_factor, component_t(fb));
234                 component_sat(temp);
235             } else {                    // R = S*fs + D*fd
236                 mul_factor(temp, fragment, src_factor);
237                 if (scratches.isUsed(src_factor.reg))
238                     scratches.recycle(src_factor.reg);
239                 // compute fd
240                 build_blend_factor(dst_factor, fd,
241                         component, pixel, fragment, fb, scratches);
242                 mul_factor_add(temp, fb, dst_factor, temp);
243                 if (!same_factor_opt1 && !same_factor_opt2) {
244                     component_sat(temp);
245                 }
246             }
247         }
248     }
249 
250     // now we can be corrupted (it's the dest)
251     temp.flags |= CORRUPTIBLE;
252 }
253 
build_blend_factor(integer_t & factor,int f,int component,const pixel_t & dst_pixel,integer_t & fragment,integer_t & fb,Scratch & scratches)254 void GGLAssembler::build_blend_factor(
255         integer_t& factor, int f, int component,
256         const pixel_t& dst_pixel,
257         integer_t& fragment,
258         integer_t& fb,
259         Scratch& scratches)
260 {
261     integer_t src_alpha(fragment);
262 
263     // src_factor/dst_factor won't be used after blending,
264     // so it's fine to mark them as CORRUPTIBLE (if not aliased)
265     factor.flags |= CORRUPTIBLE;
266 
267     switch(f) {
268     case GGL_ONE_MINUS_SRC_ALPHA:
269     case GGL_SRC_ALPHA:
270         if (component==GGLFormat::ALPHA && !isAlphaSourceNeeded()) {
271             // we're processing alpha, so we already have
272             // src-alpha in fragment, and we need src-alpha just this time.
273         } else {
274            // alpha-src will be needed for other components
275             if (!mBlendFactorCached || mBlendFactorCached==f) {
276                 src_alpha = mAlphaSource;
277                 factor = mAlphaSource;
278                 factor.flags &= ~CORRUPTIBLE;
279                 // we already computed the blend factor before, nothing to do.
280                 if (mBlendFactorCached)
281                     return;
282                 // this is the first time, make sure to compute the blend
283                 // factor properly.
284                 mBlendFactorCached = f;
285                 break;
286             } else {
287                 // we have a cached alpha blend factor, but we want another one,
288                 // this should really not happen because by construction,
289                 // we cannot have BOTH source and destination
290                 // blend factors use ALPHA *and* ONE_MINUS_ALPHA (because
291                 // the blending stage uses the f/(1-f) optimization
292 
293                 // for completeness, we handle this case though. Since there
294                 // are only 2 choices, this meens we want "the other one"
295                 // (1-factor)
296                 factor = mAlphaSource;
297                 factor.flags &= ~CORRUPTIBLE;
298                 RSB(AL, 0, factor.reg, factor.reg, imm((1<<factor.s)));
299                 mBlendFactorCached = f;
300                 return;
301             }
302         }
303         // fall-through...
304     case GGL_ONE_MINUS_DST_COLOR:
305     case GGL_DST_COLOR:
306     case GGL_ONE_MINUS_SRC_COLOR:
307     case GGL_SRC_COLOR:
308     case GGL_ONE_MINUS_DST_ALPHA:
309     case GGL_DST_ALPHA:
310     case GGL_SRC_ALPHA_SATURATE:
311         // help us find out what register we can use for the blend-factor
312         // CORRUPTIBLE registers are chosen first, or a new one is allocated.
313         if (fragment.flags & CORRUPTIBLE) {
314             factor.setTo(fragment.reg, 32, CORRUPTIBLE);
315             fragment.flags &= ~CORRUPTIBLE;
316         } else if (fb.flags & CORRUPTIBLE) {
317             factor.setTo(fb.reg, 32, CORRUPTIBLE);
318             fb.flags &= ~CORRUPTIBLE;
319         } else {
320             factor.setTo(scratches.obtain(), 32, CORRUPTIBLE);
321         }
322         break;
323     }
324 
325     // XXX: doesn't work if size==1
326 
327     switch(f) {
328     case GGL_ONE_MINUS_DST_COLOR:
329     case GGL_DST_COLOR:
330         factor.s = fb.s;
331         ADD(AL, 0, factor.reg, fb.reg, reg_imm(fb.reg, LSR, fb.s-1));
332         break;
333     case GGL_ONE_MINUS_SRC_COLOR:
334     case GGL_SRC_COLOR:
335         factor.s = fragment.s;
336         ADD(AL, 0, factor.reg, fragment.reg,
337             reg_imm(fragment.reg, LSR, fragment.s-1));
338         break;
339     case GGL_ONE_MINUS_SRC_ALPHA:
340     case GGL_SRC_ALPHA:
341         factor.s = src_alpha.s;
342         ADD(AL, 0, factor.reg, src_alpha.reg,
343                 reg_imm(src_alpha.reg, LSR, src_alpha.s-1));
344         break;
345     case GGL_ONE_MINUS_DST_ALPHA:
346     case GGL_DST_ALPHA:
347         // XXX: should be precomputed
348         extract(factor, dst_pixel, GGLFormat::ALPHA);
349         ADD(AL, 0, factor.reg, factor.reg,
350                 reg_imm(factor.reg, LSR, factor.s-1));
351         break;
352     case GGL_SRC_ALPHA_SATURATE:
353         // XXX: should be precomputed
354         // XXX: f = min(As, 1-Ad)
355         // btw, we're guaranteed that Ad's size is <= 8, because
356         // it's extracted from the framebuffer
357         break;
358     }
359 
360     switch(f) {
361     case GGL_ONE_MINUS_DST_COLOR:
362     case GGL_ONE_MINUS_SRC_COLOR:
363     case GGL_ONE_MINUS_DST_ALPHA:
364     case GGL_ONE_MINUS_SRC_ALPHA:
365         RSB(AL, 0, factor.reg, factor.reg, imm((1<<factor.s)));
366     }
367 
368     // don't need more than 8-bits for the blend factor
369     // and this will prevent overflows in the multiplies later
370     if (factor.s > 8) {
371         MOV(AL, 0, factor.reg, reg_imm(factor.reg, LSR, factor.s-8));
372         factor.s = 8;
373     }
374 }
375 
blending_codes(int fs,int fd)376 int GGLAssembler::blending_codes(int fs, int fd)
377 {
378     int blending = 0;
379     switch(fs) {
380     case GGL_ONE:
381         blending |= BLEND_SRC;
382         break;
383 
384     case GGL_ONE_MINUS_DST_COLOR:
385     case GGL_DST_COLOR:
386         blending |= FACTOR_DST|BLEND_SRC;
387         break;
388     case GGL_ONE_MINUS_DST_ALPHA:
389     case GGL_DST_ALPHA:
390         // no need to extract 'component' from the destination
391         // for the blend factor, because we need ALPHA only.
392         blending |= BLEND_SRC;
393         break;
394 
395     case GGL_ONE_MINUS_SRC_COLOR:
396     case GGL_SRC_COLOR:
397         blending |= FACTOR_SRC|BLEND_SRC;
398         break;
399     case GGL_ONE_MINUS_SRC_ALPHA:
400     case GGL_SRC_ALPHA:
401     case GGL_SRC_ALPHA_SATURATE:
402         blending |= FACTOR_SRC|BLEND_SRC;
403         break;
404     }
405     switch(fd) {
406     case GGL_ONE:
407         blending |= BLEND_DST;
408         break;
409 
410     case GGL_ONE_MINUS_DST_COLOR:
411     case GGL_DST_COLOR:
412         blending |= FACTOR_DST|BLEND_DST;
413         break;
414     case GGL_ONE_MINUS_DST_ALPHA:
415     case GGL_DST_ALPHA:
416         blending |= FACTOR_DST|BLEND_DST;
417         break;
418 
419     case GGL_ONE_MINUS_SRC_COLOR:
420     case GGL_SRC_COLOR:
421         blending |= FACTOR_SRC|BLEND_DST;
422         break;
423     case GGL_ONE_MINUS_SRC_ALPHA:
424     case GGL_SRC_ALPHA:
425         // no need to extract 'component' from the source
426         // for the blend factor, because we need ALPHA only.
427         blending |= BLEND_DST;
428         break;
429     }
430     return blending;
431 }
432 
433 // ---------------------------------------------------------------------------
434 
build_blendFOneMinusF(component_t & temp,const integer_t & factor,const integer_t & fragment,const integer_t & fb)435 void GGLAssembler::build_blendFOneMinusF(
436         component_t& temp,
437         const integer_t& factor,
438         const integer_t& fragment,
439         const integer_t& fb)
440 {
441     //  R = S*f + D*(1-f) = (S-D)*f + D
442     Scratch scratches(registerFile());
443     // compute S-D
444     integer_t diff(fragment.flags & CORRUPTIBLE ?
445             fragment.reg : scratches.obtain(), fb.size(), CORRUPTIBLE);
446     const int shift = fragment.size() - fb.size();
447     if (shift>0)        RSB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSR, shift));
448     else if (shift<0)   RSB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSL,-shift));
449     else                RSB(AL, 0, diff.reg, fb.reg, fragment.reg);
450     mul_factor_add(temp, diff, factor, component_t(fb));
451 }
452 
build_blendOneMinusFF(component_t & temp,const integer_t & factor,const integer_t & fragment,const integer_t & fb)453 void GGLAssembler::build_blendOneMinusFF(
454         component_t& temp,
455         const integer_t& factor,
456         const integer_t& fragment,
457         const integer_t& fb)
458 {
459     //  R = S*f + D*(1-f) = (S-D)*f + D
460     Scratch scratches(registerFile());
461     // compute D-S
462     integer_t diff(fb.flags & CORRUPTIBLE ?
463             fb.reg : scratches.obtain(), fb.size(), CORRUPTIBLE);
464     const int shift = fragment.size() - fb.size();
465     if (shift>0)        SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSR, shift));
466     else if (shift<0)   SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSL,-shift));
467     else                SUB(AL, 0, diff.reg, fb.reg, fragment.reg);
468     mul_factor_add(temp, diff, factor, component_t(fragment));
469 }
470 
471 // ---------------------------------------------------------------------------
472 
mul_factor(component_t & d,const integer_t & v,const integer_t & f)473 void GGLAssembler::mul_factor(  component_t& d,
474                                 const integer_t& v,
475                                 const integer_t& f)
476 {
477     int vs = v.size();
478     int fs = f.size();
479     int ms = vs+fs;
480 
481     // XXX: we could have special cases for 1 bit mul
482 
483     // all this code below to use the best multiply instruction
484     // wrt the parameters size. We take advantage of the fact
485     // that the 16-bits multiplies allow a 16-bit shift
486     // The trick is that we just make sure that we have at least 8-bits
487     // per component (which is enough for a 8 bits display).
488 
489     int xy;
490     int vshift = 0;
491     int fshift = 0;
492     int smulw = 0;
493 
494     if (vs<16) {
495         if (fs<16) {
496             xy = xyBB;
497         } else if (GGL_BETWEEN(fs, 24, 31)) {
498             ms -= 16;
499             xy = xyTB;
500         } else {
501             // eg: 15 * 18  ->  15 * 15
502             fshift = fs - 15;
503             ms -= fshift;
504             xy = xyBB;
505         }
506     } else if (GGL_BETWEEN(vs, 24, 31)) {
507         if (fs<16) {
508             ms -= 16;
509             xy = xyTB;
510         } else if (GGL_BETWEEN(fs, 24, 31)) {
511             ms -= 32;
512             xy = xyTT;
513         } else {
514             // eg: 24 * 18  ->  8 * 18
515             fshift = fs - 15;
516             ms -= 16 + fshift;
517             xy = xyTB;
518         }
519     } else {
520         if (fs<16) {
521             // eg: 18 * 15  ->  15 * 15
522             vshift = vs - 15;
523             ms -= vshift;
524             xy = xyBB;
525         } else if (GGL_BETWEEN(fs, 24, 31)) {
526             // eg: 18 * 24  ->  15 * 8
527             vshift = vs - 15;
528             ms -= 16 + vshift;
529             xy = xyBT;
530         } else {
531             // eg: 18 * 18  ->  (15 * 18)>>16
532             fshift = fs - 15;
533             ms -= 16 + fshift;
534             xy = yB;    //XXX SMULWB
535             smulw = 1;
536         }
537     }
538 
539     ALOGE_IF(ms>=32, "mul_factor overflow vs=%d, fs=%d", vs, fs);
540 
541     int vreg = v.reg;
542     int freg = f.reg;
543     if (vshift) {
544         MOV(AL, 0, d.reg, reg_imm(vreg, LSR, vshift));
545         vreg = d.reg;
546     }
547     if (fshift) {
548         MOV(AL, 0, d.reg, reg_imm(vreg, LSR, fshift));
549         freg = d.reg;
550     }
551     if (smulw)  SMULW(AL, xy, d.reg, vreg, freg);
552     else        SMUL(AL, xy, d.reg, vreg, freg);
553 
554 
555     d.h = ms;
556     if (mDithering) {
557         d.l = 0;
558     } else {
559         d.l = fs;
560         d.flags |= CLEAR_LO;
561     }
562 }
563 
mul_factor_add(component_t & d,const integer_t & v,const integer_t & f,const component_t & a)564 void GGLAssembler::mul_factor_add(  component_t& d,
565                                     const integer_t& v,
566                                     const integer_t& f,
567                                     const component_t& a)
568 {
569     // XXX: we could have special cases for 1 bit mul
570     Scratch scratches(registerFile());
571 
572     int vs = v.size();
573     int fs = f.size();
574     int as = a.h;
575     int ms = vs+fs;
576 
577     ALOGE_IF(ms>=32, "mul_factor_add overflow vs=%d, fs=%d, as=%d", vs, fs, as);
578 
579     integer_t add(a.reg, a.h, a.flags);
580 
581     // 'a' is a component_t but it is guaranteed to have
582     // its high bits set to 0. However in the dithering case,
583     // we can't get away with truncating the potentially bad bits
584     // so extraction is needed.
585 
586    if ((mDithering) && (a.size() < ms)) {
587         // we need to expand a
588         if (!(a.flags & CORRUPTIBLE)) {
589             // ... but it's not corruptible, so we need to pick a
590             // temporary register.
591             // Try to uses the destination register first (it's likely
592             // to be usable, unless it aliases an input).
593             if (d.reg!=a.reg && d.reg!=v.reg && d.reg!=f.reg) {
594                 add.reg = d.reg;
595             } else {
596                 add.reg = scratches.obtain();
597             }
598         }
599         expand(add, a, ms); // extracts and expands
600         as = ms;
601     }
602 
603     if (ms == as) {
604         if (vs<16 && fs<16) SMLABB(AL, d.reg, v.reg, f.reg, add.reg);
605         else                MLA(AL, 0, d.reg, v.reg, f.reg, add.reg);
606     } else {
607         int temp = d.reg;
608         if (temp == add.reg) {
609             // the mul will modify add.reg, we need an intermediary reg
610             if (v.flags & CORRUPTIBLE)      temp = v.reg;
611             else if (f.flags & CORRUPTIBLE) temp = f.reg;
612             else                            temp = scratches.obtain();
613         }
614 
615         if (vs<16 && fs<16) SMULBB(AL, temp, v.reg, f.reg);
616         else                MUL(AL, 0, temp, v.reg, f.reg);
617 
618         if (ms>as) {
619             ADD(AL, 0, d.reg, temp, reg_imm(add.reg, LSL, ms-as));
620         } else if (ms<as) {
621             // not sure if we should expand the mul instead?
622             ADD(AL, 0, d.reg, temp, reg_imm(add.reg, LSR, as-ms));
623         }
624     }
625 
626     d.h = ms;
627     if (mDithering) {
628         d.l = a.l;
629     } else {
630         d.l = fs>a.l ? fs : a.l;
631         d.flags |= CLEAR_LO;
632     }
633 }
634 
component_add(component_t & d,const integer_t & dst,const integer_t & src)635 void GGLAssembler::component_add(component_t& d,
636         const integer_t& dst, const integer_t& src)
637 {
638     // here we're guaranteed that fragment.size() >= fb.size()
639     const int shift = src.size() - dst.size();
640     if (!shift) {
641         ADD(AL, 0, d.reg, src.reg, dst.reg);
642     } else {
643         ADD(AL, 0, d.reg, src.reg, reg_imm(dst.reg, LSL, shift));
644     }
645 
646     d.h = src.size();
647     if (mDithering) {
648         d.l = 0;
649     } else {
650         d.l = shift;
651         d.flags |= CLEAR_LO;
652     }
653 }
654 
component_sat(const component_t & v)655 void GGLAssembler::component_sat(const component_t& v)
656 {
657     const int one = ((1<<v.size())-1)<<v.l;
658     CMP(AL, v.reg, imm( 1<<v.h ));
659     if (isValidImmediate(one)) {
660         MOV(HS, 0, v.reg, imm( one ));
661     } else if (isValidImmediate(~one)) {
662         MVN(HS, 0, v.reg, imm( ~one ));
663     } else {
664         MOV(HS, 0, v.reg, imm( 1<<v.h ));
665         SUB(HS, 0, v.reg, v.reg, imm( 1<<v.l ));
666     }
667 }
668 
669 // ----------------------------------------------------------------------------
670 
671 }; // namespace android
672 
673