1 /* libs/pixelflinger/codeflinger/blending.cpp
2 **
3 ** Copyright 2006, The Android Open Source Project
4 **
5 ** Licensed under the Apache License, Version 2.0 (the "License");
6 ** you may not use this file except in compliance with the License.
7 ** You may obtain a copy of the License at
8 **
9 ** http://www.apache.org/licenses/LICENSE-2.0
10 **
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 */
17
18 #define LOG_TAG "pixelflinger-code"
19
20 #include <assert.h>
21 #include <stdint.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <sys/types.h>
25
26 #include <log/log.h>
27
28 #include "GGLAssembler.h"
29
30 namespace android {
31
build_fog(component_t & temp,int component,Scratch & regs)32 void GGLAssembler::build_fog(
33 component_t& temp, // incomming fragment / output
34 int component,
35 Scratch& regs)
36 {
37 if (mInfo[component].fog) {
38 Scratch scratches(registerFile());
39 comment("fog");
40
41 integer_t fragment(temp.reg, temp.h, temp.flags);
42 if (!(temp.flags & CORRUPTIBLE)) {
43 temp.reg = regs.obtain();
44 temp.flags |= CORRUPTIBLE;
45 }
46
47 integer_t fogColor(scratches.obtain(), 8, CORRUPTIBLE);
48 LDRB(AL, fogColor.reg, mBuilderContext.Rctx,
49 immed12_pre(GGL_OFFSETOF(state.fog.color[component])));
50
51 integer_t factor(scratches.obtain(), 16, CORRUPTIBLE);
52 CONTEXT_LOAD(factor.reg, generated_vars.f);
53
54 // clamp fog factor (TODO: see if there is a way to guarantee
55 // we won't overflow, when setting the iterators)
56 BIC(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, ASR, 31));
57 CMP(AL, factor.reg, imm( 0x10000 ));
58 MOV(HS, 0, factor.reg, imm( 0x10000 ));
59
60 build_blendFOneMinusF(temp, factor, fragment, fogColor);
61 }
62 }
63
build_blending(component_t & temp,const pixel_t & pixel,int component,Scratch & regs)64 void GGLAssembler::build_blending(
65 component_t& temp, // incomming fragment / output
66 const pixel_t& pixel, // framebuffer
67 int component,
68 Scratch& regs)
69 {
70 if (!mInfo[component].blend)
71 return;
72
73 int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
74 int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
75 if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA)
76 fs = GGL_ONE;
77 const int blending = blending_codes(fs, fd);
78 if (!temp.size()) {
79 // here, blending will produce something which doesn't depend on
80 // that component (eg: GL_ZERO:GL_*), so the register has not been
81 // allocated yet. Will never be used as a source.
82 temp = component_t(regs.obtain(), CORRUPTIBLE);
83 }
84
85 // we are doing real blending...
86 // fb: extracted dst
87 // fragment: extracted src
88 // temp: component_t(fragment) and result
89
90 // scoped register allocator
91 Scratch scratches(registerFile());
92 comment("blending");
93
94 // we can optimize these cases a bit...
95 // (1) saturation is not needed
96 // (2) we can use only one multiply instead of 2
97 // (3) we can reduce the register pressure
98 // R = S*f + D*(1-f) = (S-D)*f + D
99 // R = S*(1-f) + D*f = (D-S)*f + S
100
101 const bool same_factor_opt1 =
102 (fs==GGL_DST_COLOR && fd==GGL_ONE_MINUS_DST_COLOR) ||
103 (fs==GGL_SRC_COLOR && fd==GGL_ONE_MINUS_SRC_COLOR) ||
104 (fs==GGL_DST_ALPHA && fd==GGL_ONE_MINUS_DST_ALPHA) ||
105 (fs==GGL_SRC_ALPHA && fd==GGL_ONE_MINUS_SRC_ALPHA);
106
107 const bool same_factor_opt2 =
108 (fs==GGL_ONE_MINUS_DST_COLOR && fd==GGL_DST_COLOR) ||
109 (fs==GGL_ONE_MINUS_SRC_COLOR && fd==GGL_SRC_COLOR) ||
110 (fs==GGL_ONE_MINUS_DST_ALPHA && fd==GGL_DST_ALPHA) ||
111 (fs==GGL_ONE_MINUS_SRC_ALPHA && fd==GGL_SRC_ALPHA);
112
113
114 // XXX: we could also optimize these cases:
115 // R = S*f + D*f = (S+D)*f
116 // R = S*(1-f) + D*(1-f) = (S+D)*(1-f)
117 // R = S*D + D*S = 2*S*D
118
119
120 // see if we need to extract 'component' from the destination (fb)
121 integer_t fb;
122 if (blending & (BLEND_DST|FACTOR_DST)) {
123 fb.setTo(scratches.obtain(), 32);
124 extract(fb, pixel, component);
125 if (mDithering) {
126 // XXX: maybe what we should do instead, is simply
127 // expand fb -or- fragment to the larger of the two
128 if (fb.size() < temp.size()) {
129 // for now we expand 'fb' to min(fragment, 8)
130 int new_size = temp.size() < 8 ? temp.size() : 8;
131 expand(fb, fb, new_size);
132 }
133 }
134 }
135
136
137 // convert input fragment to integer_t
138 if (temp.l && (temp.flags & CORRUPTIBLE)) {
139 MOV(AL, 0, temp.reg, reg_imm(temp.reg, LSR, temp.l));
140 temp.h -= temp.l;
141 temp.l = 0;
142 }
143 integer_t fragment(temp.reg, temp.size(), temp.flags);
144
145 // if not done yet, convert input fragment to integer_t
146 if (temp.l) {
147 // here we know temp is not CORRUPTIBLE
148 fragment.reg = scratches.obtain();
149 MOV(AL, 0, fragment.reg, reg_imm(temp.reg, LSR, temp.l));
150 fragment.flags |= CORRUPTIBLE;
151 }
152
153 if (!(temp.flags & CORRUPTIBLE)) {
154 // temp is not corruptible, but since it's the destination it
155 // will be modified, so we need to allocate a new register.
156 temp.reg = regs.obtain();
157 temp.flags &= ~CORRUPTIBLE;
158 fragment.flags &= ~CORRUPTIBLE;
159 }
160
161 if ((blending & BLEND_SRC) && !same_factor_opt1) {
162 // source (fragment) is needed for the blending stage
163 // so it's not CORRUPTIBLE (unless we're doing same_factor_opt1)
164 fragment.flags &= ~CORRUPTIBLE;
165 }
166
167
168 if (same_factor_opt1) {
169 // R = S*f + D*(1-f) = (S-D)*f + D
170 integer_t factor;
171 build_blend_factor(factor, fs,
172 component, pixel, fragment, fb, scratches);
173 // fb is always corruptible from this point
174 fb.flags |= CORRUPTIBLE;
175 build_blendFOneMinusF(temp, factor, fragment, fb);
176 } else if (same_factor_opt2) {
177 // R = S*(1-f) + D*f = (D-S)*f + S
178 integer_t factor;
179 // fb is always corrruptible here
180 fb.flags |= CORRUPTIBLE;
181 build_blend_factor(factor, fd,
182 component, pixel, fragment, fb, scratches);
183 build_blendOneMinusFF(temp, factor, fragment, fb);
184 } else {
185 integer_t src_factor;
186 integer_t dst_factor;
187
188 // if destination (fb) is not needed for the blending stage,
189 // then it can be marked as CORRUPTIBLE
190 if (!(blending & BLEND_DST)) {
191 fb.flags |= CORRUPTIBLE;
192 }
193
194 // XXX: try to mark some registers as CORRUPTIBLE
195 // in most case we could make those corruptible
196 // when we're processing the last component
197 // but not always, for instance
198 // when fragment is constant and not reloaded
199 // when fb is needed for logic-ops or masking
200 // when a register is aliased (for instance with mAlphaSource)
201
202 // blend away...
203 if (fs==GGL_ZERO) {
204 if (fd==GGL_ZERO) { // R = 0
205 // already taken care of
206 } else if (fd==GGL_ONE) { // R = D
207 // already taken care of
208 } else { // R = D*fd
209 // compute fd
210 build_blend_factor(dst_factor, fd,
211 component, pixel, fragment, fb, scratches);
212 mul_factor(temp, fb, dst_factor);
213 }
214 } else if (fs==GGL_ONE) {
215 if (fd==GGL_ZERO) { // R = S
216 // NOP, taken care of
217 } else if (fd==GGL_ONE) { // R = S + D
218 component_add(temp, fb, fragment); // args order matters
219 component_sat(temp);
220 } else { // R = S + D*fd
221 // compute fd
222 build_blend_factor(dst_factor, fd,
223 component, pixel, fragment, fb, scratches);
224 mul_factor_add(temp, fb, dst_factor, component_t(fragment));
225 component_sat(temp);
226 }
227 } else {
228 // compute fs
229 build_blend_factor(src_factor, fs,
230 component, pixel, fragment, fb, scratches);
231 if (fd==GGL_ZERO) { // R = S*fs
232 mul_factor(temp, fragment, src_factor);
233 } else if (fd==GGL_ONE) { // R = S*fs + D
234 mul_factor_add(temp, fragment, src_factor, component_t(fb));
235 component_sat(temp);
236 } else { // R = S*fs + D*fd
237 mul_factor(temp, fragment, src_factor);
238 if (scratches.isUsed(src_factor.reg))
239 scratches.recycle(src_factor.reg);
240 // compute fd
241 build_blend_factor(dst_factor, fd,
242 component, pixel, fragment, fb, scratches);
243 mul_factor_add(temp, fb, dst_factor, temp);
244 if (!same_factor_opt1 && !same_factor_opt2) {
245 component_sat(temp);
246 }
247 }
248 }
249 }
250
251 // now we can be corrupted (it's the dest)
252 temp.flags |= CORRUPTIBLE;
253 }
254
build_blend_factor(integer_t & factor,int f,int component,const pixel_t & dst_pixel,integer_t & fragment,integer_t & fb,Scratch & scratches)255 void GGLAssembler::build_blend_factor(
256 integer_t& factor, int f, int component,
257 const pixel_t& dst_pixel,
258 integer_t& fragment,
259 integer_t& fb,
260 Scratch& scratches)
261 {
262 integer_t src_alpha(fragment);
263
264 // src_factor/dst_factor won't be used after blending,
265 // so it's fine to mark them as CORRUPTIBLE (if not aliased)
266 factor.flags |= CORRUPTIBLE;
267
268 switch(f) {
269 case GGL_ONE_MINUS_SRC_ALPHA:
270 case GGL_SRC_ALPHA:
271 if (component==GGLFormat::ALPHA && !isAlphaSourceNeeded()) {
272 // we're processing alpha, so we already have
273 // src-alpha in fragment, and we need src-alpha just this time.
274 } else {
275 // alpha-src will be needed for other components
276 if (!mBlendFactorCached || mBlendFactorCached==f) {
277 src_alpha = mAlphaSource;
278 factor = mAlphaSource;
279 factor.flags &= ~CORRUPTIBLE;
280 // we already computed the blend factor before, nothing to do.
281 if (mBlendFactorCached)
282 return;
283 // this is the first time, make sure to compute the blend
284 // factor properly.
285 mBlendFactorCached = f;
286 break;
287 } else {
288 // we have a cached alpha blend factor, but we want another one,
289 // this should really not happen because by construction,
290 // we cannot have BOTH source and destination
291 // blend factors use ALPHA *and* ONE_MINUS_ALPHA (because
292 // the blending stage uses the f/(1-f) optimization
293
294 // for completeness, we handle this case though. Since there
295 // are only 2 choices, this meens we want "the other one"
296 // (1-factor)
297 factor = mAlphaSource;
298 factor.flags &= ~CORRUPTIBLE;
299 RSB(AL, 0, factor.reg, factor.reg, imm((1<<factor.s)));
300 mBlendFactorCached = f;
301 return;
302 }
303 }
304 // fall-through...
305 case GGL_ONE_MINUS_DST_COLOR:
306 case GGL_DST_COLOR:
307 case GGL_ONE_MINUS_SRC_COLOR:
308 case GGL_SRC_COLOR:
309 case GGL_ONE_MINUS_DST_ALPHA:
310 case GGL_DST_ALPHA:
311 case GGL_SRC_ALPHA_SATURATE:
312 // help us find out what register we can use for the blend-factor
313 // CORRUPTIBLE registers are chosen first, or a new one is allocated.
314 if (fragment.flags & CORRUPTIBLE) {
315 factor.setTo(fragment.reg, 32, CORRUPTIBLE);
316 fragment.flags &= ~CORRUPTIBLE;
317 } else if (fb.flags & CORRUPTIBLE) {
318 factor.setTo(fb.reg, 32, CORRUPTIBLE);
319 fb.flags &= ~CORRUPTIBLE;
320 } else {
321 factor.setTo(scratches.obtain(), 32, CORRUPTIBLE);
322 }
323 break;
324 }
325
326 // XXX: doesn't work if size==1
327
328 switch(f) {
329 case GGL_ONE_MINUS_DST_COLOR:
330 case GGL_DST_COLOR:
331 factor.s = fb.s;
332 ADD(AL, 0, factor.reg, fb.reg, reg_imm(fb.reg, LSR, fb.s-1));
333 break;
334 case GGL_ONE_MINUS_SRC_COLOR:
335 case GGL_SRC_COLOR:
336 factor.s = fragment.s;
337 ADD(AL, 0, factor.reg, fragment.reg,
338 reg_imm(fragment.reg, LSR, fragment.s-1));
339 break;
340 case GGL_ONE_MINUS_SRC_ALPHA:
341 case GGL_SRC_ALPHA:
342 factor.s = src_alpha.s;
343 ADD(AL, 0, factor.reg, src_alpha.reg,
344 reg_imm(src_alpha.reg, LSR, src_alpha.s-1));
345 break;
346 case GGL_ONE_MINUS_DST_ALPHA:
347 case GGL_DST_ALPHA:
348 // XXX: should be precomputed
349 extract(factor, dst_pixel, GGLFormat::ALPHA);
350 ADD(AL, 0, factor.reg, factor.reg,
351 reg_imm(factor.reg, LSR, factor.s-1));
352 break;
353 case GGL_SRC_ALPHA_SATURATE:
354 // XXX: should be precomputed
355 // XXX: f = min(As, 1-Ad)
356 // btw, we're guaranteed that Ad's size is <= 8, because
357 // it's extracted from the framebuffer
358 break;
359 }
360
361 switch(f) {
362 case GGL_ONE_MINUS_DST_COLOR:
363 case GGL_ONE_MINUS_SRC_COLOR:
364 case GGL_ONE_MINUS_DST_ALPHA:
365 case GGL_ONE_MINUS_SRC_ALPHA:
366 RSB(AL, 0, factor.reg, factor.reg, imm((1<<factor.s)));
367 }
368
369 // don't need more than 8-bits for the blend factor
370 // and this will prevent overflows in the multiplies later
371 if (factor.s > 8) {
372 MOV(AL, 0, factor.reg, reg_imm(factor.reg, LSR, factor.s-8));
373 factor.s = 8;
374 }
375 }
376
blending_codes(int fs,int fd)377 int GGLAssembler::blending_codes(int fs, int fd)
378 {
379 int blending = 0;
380 switch(fs) {
381 case GGL_ONE:
382 blending |= BLEND_SRC;
383 break;
384
385 case GGL_ONE_MINUS_DST_COLOR:
386 case GGL_DST_COLOR:
387 blending |= FACTOR_DST|BLEND_SRC;
388 break;
389 case GGL_ONE_MINUS_DST_ALPHA:
390 case GGL_DST_ALPHA:
391 // no need to extract 'component' from the destination
392 // for the blend factor, because we need ALPHA only.
393 blending |= BLEND_SRC;
394 break;
395
396 case GGL_ONE_MINUS_SRC_COLOR:
397 case GGL_SRC_COLOR:
398 blending |= FACTOR_SRC|BLEND_SRC;
399 break;
400 case GGL_ONE_MINUS_SRC_ALPHA:
401 case GGL_SRC_ALPHA:
402 case GGL_SRC_ALPHA_SATURATE:
403 blending |= FACTOR_SRC|BLEND_SRC;
404 break;
405 }
406 switch(fd) {
407 case GGL_ONE:
408 blending |= BLEND_DST;
409 break;
410
411 case GGL_ONE_MINUS_DST_COLOR:
412 case GGL_DST_COLOR:
413 blending |= FACTOR_DST|BLEND_DST;
414 break;
415 case GGL_ONE_MINUS_DST_ALPHA:
416 case GGL_DST_ALPHA:
417 blending |= FACTOR_DST|BLEND_DST;
418 break;
419
420 case GGL_ONE_MINUS_SRC_COLOR:
421 case GGL_SRC_COLOR:
422 blending |= FACTOR_SRC|BLEND_DST;
423 break;
424 case GGL_ONE_MINUS_SRC_ALPHA:
425 case GGL_SRC_ALPHA:
426 // no need to extract 'component' from the source
427 // for the blend factor, because we need ALPHA only.
428 blending |= BLEND_DST;
429 break;
430 }
431 return blending;
432 }
433
434 // ---------------------------------------------------------------------------
435
build_blendFOneMinusF(component_t & temp,const integer_t & factor,const integer_t & fragment,const integer_t & fb)436 void GGLAssembler::build_blendFOneMinusF(
437 component_t& temp,
438 const integer_t& factor,
439 const integer_t& fragment,
440 const integer_t& fb)
441 {
442 // R = S*f + D*(1-f) = (S-D)*f + D
443 Scratch scratches(registerFile());
444 // compute S-D
445 integer_t diff(fragment.flags & CORRUPTIBLE ?
446 fragment.reg : scratches.obtain(), fb.size(), CORRUPTIBLE);
447 const int shift = fragment.size() - fb.size();
448 if (shift>0) RSB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSR, shift));
449 else if (shift<0) RSB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSL,-shift));
450 else RSB(AL, 0, diff.reg, fb.reg, fragment.reg);
451 mul_factor_add(temp, diff, factor, component_t(fb));
452 }
453
build_blendOneMinusFF(component_t & temp,const integer_t & factor,const integer_t & fragment,const integer_t & fb)454 void GGLAssembler::build_blendOneMinusFF(
455 component_t& temp,
456 const integer_t& factor,
457 const integer_t& fragment,
458 const integer_t& fb)
459 {
460 // R = S*f + D*(1-f) = (S-D)*f + D
461 Scratch scratches(registerFile());
462 // compute D-S
463 integer_t diff(fb.flags & CORRUPTIBLE ?
464 fb.reg : scratches.obtain(), fb.size(), CORRUPTIBLE);
465 const int shift = fragment.size() - fb.size();
466 if (shift>0) SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSR, shift));
467 else if (shift<0) SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSL,-shift));
468 else SUB(AL, 0, diff.reg, fb.reg, fragment.reg);
469 mul_factor_add(temp, diff, factor, component_t(fragment));
470 }
471
472 // ---------------------------------------------------------------------------
473
mul_factor(component_t & d,const integer_t & v,const integer_t & f)474 void GGLAssembler::mul_factor( component_t& d,
475 const integer_t& v,
476 const integer_t& f)
477 {
478 int vs = v.size();
479 int fs = f.size();
480 int ms = vs+fs;
481
482 // XXX: we could have special cases for 1 bit mul
483
484 // all this code below to use the best multiply instruction
485 // wrt the parameters size. We take advantage of the fact
486 // that the 16-bits multiplies allow a 16-bit shift
487 // The trick is that we just make sure that we have at least 8-bits
488 // per component (which is enough for a 8 bits display).
489
490 int xy;
491 int vshift = 0;
492 int fshift = 0;
493 int smulw = 0;
494
495 if (vs<16) {
496 if (fs<16) {
497 xy = xyBB;
498 } else if (GGL_BETWEEN(fs, 24, 31)) {
499 ms -= 16;
500 xy = xyTB;
501 } else {
502 // eg: 15 * 18 -> 15 * 15
503 fshift = fs - 15;
504 ms -= fshift;
505 xy = xyBB;
506 }
507 } else if (GGL_BETWEEN(vs, 24, 31)) {
508 if (fs<16) {
509 ms -= 16;
510 xy = xyTB;
511 } else if (GGL_BETWEEN(fs, 24, 31)) {
512 ms -= 32;
513 xy = xyTT;
514 } else {
515 // eg: 24 * 18 -> 8 * 18
516 fshift = fs - 15;
517 ms -= 16 + fshift;
518 xy = xyTB;
519 }
520 } else {
521 if (fs<16) {
522 // eg: 18 * 15 -> 15 * 15
523 vshift = vs - 15;
524 ms -= vshift;
525 xy = xyBB;
526 } else if (GGL_BETWEEN(fs, 24, 31)) {
527 // eg: 18 * 24 -> 15 * 8
528 vshift = vs - 15;
529 ms -= 16 + vshift;
530 xy = xyBT;
531 } else {
532 // eg: 18 * 18 -> (15 * 18)>>16
533 fshift = fs - 15;
534 ms -= 16 + fshift;
535 xy = yB; //XXX SMULWB
536 smulw = 1;
537 }
538 }
539
540 ALOGE_IF(ms>=32, "mul_factor overflow vs=%d, fs=%d", vs, fs);
541
542 int vreg = v.reg;
543 int freg = f.reg;
544 if (vshift) {
545 MOV(AL, 0, d.reg, reg_imm(vreg, LSR, vshift));
546 vreg = d.reg;
547 }
548 if (fshift) {
549 MOV(AL, 0, d.reg, reg_imm(vreg, LSR, fshift));
550 freg = d.reg;
551 }
552 if (smulw) SMULW(AL, xy, d.reg, vreg, freg);
553 else SMUL(AL, xy, d.reg, vreg, freg);
554
555
556 d.h = ms;
557 if (mDithering) {
558 d.l = 0;
559 } else {
560 d.l = fs;
561 d.flags |= CLEAR_LO;
562 }
563 }
564
mul_factor_add(component_t & d,const integer_t & v,const integer_t & f,const component_t & a)565 void GGLAssembler::mul_factor_add( component_t& d,
566 const integer_t& v,
567 const integer_t& f,
568 const component_t& a)
569 {
570 // XXX: we could have special cases for 1 bit mul
571 Scratch scratches(registerFile());
572
573 int vs = v.size();
574 int fs = f.size();
575 int as = a.h;
576 int ms = vs+fs;
577
578 ALOGE_IF(ms>=32, "mul_factor_add overflow vs=%d, fs=%d, as=%d", vs, fs, as);
579
580 integer_t add(a.reg, a.h, a.flags);
581
582 // 'a' is a component_t but it is guaranteed to have
583 // its high bits set to 0. However in the dithering case,
584 // we can't get away with truncating the potentially bad bits
585 // so extraction is needed.
586
587 if ((mDithering) && (a.size() < ms)) {
588 // we need to expand a
589 if (!(a.flags & CORRUPTIBLE)) {
590 // ... but it's not corruptible, so we need to pick a
591 // temporary register.
592 // Try to uses the destination register first (it's likely
593 // to be usable, unless it aliases an input).
594 if (d.reg!=a.reg && d.reg!=v.reg && d.reg!=f.reg) {
595 add.reg = d.reg;
596 } else {
597 add.reg = scratches.obtain();
598 }
599 }
600 expand(add, a, ms); // extracts and expands
601 as = ms;
602 }
603
604 if (ms == as) {
605 if (vs<16 && fs<16) SMLABB(AL, d.reg, v.reg, f.reg, add.reg);
606 else MLA(AL, 0, d.reg, v.reg, f.reg, add.reg);
607 } else {
608 int temp = d.reg;
609 if (temp == add.reg) {
610 // the mul will modify add.reg, we need an intermediary reg
611 if (v.flags & CORRUPTIBLE) temp = v.reg;
612 else if (f.flags & CORRUPTIBLE) temp = f.reg;
613 else temp = scratches.obtain();
614 }
615
616 if (vs<16 && fs<16) SMULBB(AL, temp, v.reg, f.reg);
617 else MUL(AL, 0, temp, v.reg, f.reg);
618
619 if (ms>as) {
620 ADD(AL, 0, d.reg, temp, reg_imm(add.reg, LSL, ms-as));
621 } else if (ms<as) {
622 // not sure if we should expand the mul instead?
623 ADD(AL, 0, d.reg, temp, reg_imm(add.reg, LSR, as-ms));
624 }
625 }
626
627 d.h = ms;
628 if (mDithering) {
629 d.l = a.l;
630 } else {
631 d.l = fs>a.l ? fs : a.l;
632 d.flags |= CLEAR_LO;
633 }
634 }
635
component_add(component_t & d,const integer_t & dst,const integer_t & src)636 void GGLAssembler::component_add(component_t& d,
637 const integer_t& dst, const integer_t& src)
638 {
639 // here we're guaranteed that fragment.size() >= fb.size()
640 const int shift = src.size() - dst.size();
641 if (!shift) {
642 ADD(AL, 0, d.reg, src.reg, dst.reg);
643 } else {
644 ADD(AL, 0, d.reg, src.reg, reg_imm(dst.reg, LSL, shift));
645 }
646
647 d.h = src.size();
648 if (mDithering) {
649 d.l = 0;
650 } else {
651 d.l = shift;
652 d.flags |= CLEAR_LO;
653 }
654 }
655
component_sat(const component_t & v)656 void GGLAssembler::component_sat(const component_t& v)
657 {
658 const int one = ((1<<v.size())-1)<<v.l;
659 CMP(AL, v.reg, imm( 1<<v.h ));
660 if (isValidImmediate(one)) {
661 MOV(HS, 0, v.reg, imm( one ));
662 } else if (isValidImmediate(~one)) {
663 MVN(HS, 0, v.reg, imm( ~one ));
664 } else {
665 MOV(HS, 0, v.reg, imm( 1<<v.h ));
666 SUB(HS, 0, v.reg, v.reg, imm( 1<<v.l ));
667 }
668 }
669
670 // ----------------------------------------------------------------------------
671
672 }; // namespace android
673
674