1 /* libs/pixelflinger/codeflinger/GGLAssembler.cpp
2 **
3 ** Copyright 2006, The Android Open Source Project
4 **
5 ** Licensed under the Apache License, Version 2.0 (the "License");
6 ** you may not use this file except in compliance with the License.
7 ** You may obtain a copy of the License at
8 **
9 ** http://www.apache.org/licenses/LICENSE-2.0
10 **
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 */
17
18 #define LOG_TAG "GGLAssembler"
19
20 #include <assert.h>
21 #include <stdint.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <sys/types.h>
25
26 #include <log/log.h>
27
28 #include "GGLAssembler.h"
29
30 namespace android {
31
32 // ----------------------------------------------------------------------------
33
GGLAssembler(ARMAssemblerInterface * target)34 GGLAssembler::GGLAssembler(ARMAssemblerInterface* target)
35 : ARMAssemblerProxy(target),
36 RegisterAllocator(ARMAssemblerProxy::getCodegenArch()), mOptLevel(7)
37 {
38 }
39
~GGLAssembler()40 GGLAssembler::~GGLAssembler()
41 {
42 }
43
prolog()44 void GGLAssembler::prolog()
45 {
46 ARMAssemblerProxy::prolog();
47 }
48
epilog(uint32_t touched)49 void GGLAssembler::epilog(uint32_t touched)
50 {
51 ARMAssemblerProxy::epilog(touched);
52 }
53
reset(int opt_level)54 void GGLAssembler::reset(int opt_level)
55 {
56 ARMAssemblerProxy::reset();
57 RegisterAllocator::reset();
58 mOptLevel = opt_level;
59 }
60
61 // ---------------------------------------------------------------------------
62
scanline(const needs_t & needs,context_t const * c)63 int GGLAssembler::scanline(const needs_t& needs, context_t const* c)
64 {
65 int err = 0;
66 int opt_level = mOptLevel;
67 while (opt_level >= 0) {
68 reset(opt_level);
69 err = scanline_core(needs, c);
70 if (err == 0)
71 break;
72 opt_level--;
73 }
74
75 // XXX: in theory, pcForLabel is not valid before generate()
76 uint32_t* fragment_start_pc = pcForLabel("fragment_loop");
77 uint32_t* fragment_end_pc = pcForLabel("epilog");
78 const int per_fragment_ops = int(fragment_end_pc - fragment_start_pc);
79
80 // build a name for our pipeline
81 char name[64];
82 sprintf(name,
83 "scanline__%08X:%08X_%08X_%08X [%3d ipp]",
84 needs.p, needs.n, needs.t[0], needs.t[1], per_fragment_ops);
85
86 if (err) {
87 ALOGE("Error while generating ""%s""\n", name);
88 disassemble(name);
89 return -1;
90 }
91
92 return generate(name);
93 }
94
scanline_core(const needs_t & needs,context_t const * c)95 int GGLAssembler::scanline_core(const needs_t& needs, context_t const* c)
96 {
97 int64_t duration = ggl_system_time();
98
99 mBlendFactorCached = 0;
100 mBlending = 0;
101 mMasking = 0;
102 mAA = GGL_READ_NEEDS(P_AA, needs.p);
103 mDithering = GGL_READ_NEEDS(P_DITHER, needs.p);
104 mAlphaTest = GGL_READ_NEEDS(P_ALPHA_TEST, needs.p) + GGL_NEVER;
105 mDepthTest = GGL_READ_NEEDS(P_DEPTH_TEST, needs.p) + GGL_NEVER;
106 mFog = GGL_READ_NEEDS(P_FOG, needs.p) != 0;
107 mSmooth = GGL_READ_NEEDS(SHADE, needs.n) != 0;
108 mBuilderContext.needs = needs;
109 mBuilderContext.c = c;
110 mBuilderContext.Rctx = reserveReg(R0); // context always in R0
111 mCbFormat = c->formats[ GGL_READ_NEEDS(CB_FORMAT, needs.n) ];
112
113 // ------------------------------------------------------------------------
114
115 decodeLogicOpNeeds(needs);
116
117 decodeTMUNeeds(needs, c);
118
119 mBlendSrc = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRC, needs.n));
120 mBlendDst = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DST, needs.n));
121 mBlendSrcA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRCA, needs.n));
122 mBlendDstA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DSTA, needs.n));
123
124 if (!mCbFormat.c[GGLFormat::ALPHA].h) {
125 if ((mBlendSrc == GGL_ONE_MINUS_DST_ALPHA) ||
126 (mBlendSrc == GGL_DST_ALPHA)) {
127 mBlendSrc = GGL_ONE;
128 }
129 if ((mBlendSrcA == GGL_ONE_MINUS_DST_ALPHA) ||
130 (mBlendSrcA == GGL_DST_ALPHA)) {
131 mBlendSrcA = GGL_ONE;
132 }
133 if ((mBlendDst == GGL_ONE_MINUS_DST_ALPHA) ||
134 (mBlendDst == GGL_DST_ALPHA)) {
135 mBlendDst = GGL_ONE;
136 }
137 if ((mBlendDstA == GGL_ONE_MINUS_DST_ALPHA) ||
138 (mBlendDstA == GGL_DST_ALPHA)) {
139 mBlendDstA = GGL_ONE;
140 }
141 }
142
143 // if we need the framebuffer, read it now
144 const int blending = blending_codes(mBlendSrc, mBlendDst) |
145 blending_codes(mBlendSrcA, mBlendDstA);
146
147 // XXX: handle special cases, destination not modified...
148 if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) &&
149 (mBlendDst==GGL_ONE) && (mBlendDstA==GGL_ONE)) {
150 // Destination unmodified (beware of logic ops)
151 } else if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) &&
152 (mBlendDst==GGL_ZERO) && (mBlendDstA==GGL_ZERO)) {
153 // Destination is zero (beware of logic ops)
154 }
155
156 int fbComponents = 0;
157 const int masking = GGL_READ_NEEDS(MASK_ARGB, needs.n);
158 for (int i=0 ; i<4 ; i++) {
159 const int mask = 1<<i;
160 component_info_t& info = mInfo[i];
161 int fs = i==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
162 int fd = i==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
163 if (fs==GGL_SRC_ALPHA_SATURATE && i==GGLFormat::ALPHA)
164 fs = GGL_ONE;
165 info.masked = !!(masking & mask);
166 info.inDest = !info.masked && mCbFormat.c[i].h &&
167 ((mLogicOp & LOGIC_OP_SRC) || (!mLogicOp));
168 if (mCbFormat.components >= GGL_LUMINANCE &&
169 (i==GGLFormat::GREEN || i==GGLFormat::BLUE)) {
170 info.inDest = false;
171 }
172 info.needed = (i==GGLFormat::ALPHA) &&
173 (isAlphaSourceNeeded() || mAlphaTest != GGL_ALWAYS);
174 info.replaced = !!(mTextureMachine.replaced & mask);
175 info.iterated = (!info.replaced && (info.inDest || info.needed));
176 info.smooth = mSmooth && info.iterated;
177 info.fog = mFog && info.inDest && (i != GGLFormat::ALPHA);
178 info.blend = (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO));
179
180 mBlending |= (info.blend ? mask : 0);
181 mMasking |= (mCbFormat.c[i].h && info.masked) ? mask : 0;
182 fbComponents |= mCbFormat.c[i].h ? mask : 0;
183 }
184
185 mAllMasked = (mMasking == fbComponents);
186 if (mAllMasked) {
187 mDithering = 0;
188 }
189
190 fragment_parts_t parts;
191
192 // ------------------------------------------------------------------------
193 prolog();
194 // ------------------------------------------------------------------------
195
196 build_scanline_prolog(parts, needs);
197
198 if (registerFile().status())
199 return registerFile().status();
200
201 // ------------------------------------------------------------------------
202 label("fragment_loop");
203 // ------------------------------------------------------------------------
204 {
205 Scratch regs(registerFile());
206
207 if (mDithering) {
208 // update the dither index.
209 MOV(AL, 0, parts.count.reg,
210 reg_imm(parts.count.reg, ROR, GGL_DITHER_ORDER_SHIFT));
211 ADD(AL, 0, parts.count.reg, parts.count.reg,
212 imm( 1 << (32 - GGL_DITHER_ORDER_SHIFT)));
213 MOV(AL, 0, parts.count.reg,
214 reg_imm(parts.count.reg, ROR, 32 - GGL_DITHER_ORDER_SHIFT));
215 }
216
217 // XXX: could we do an early alpha-test here in some cases?
218 // It would probaly be used only with smooth-alpha and no texture
219 // (or no alpha component in the texture).
220
221 // Early z-test
222 if (mAlphaTest==GGL_ALWAYS) {
223 build_depth_test(parts, Z_TEST|Z_WRITE);
224 } else {
225 // we cannot do the z-write here, because
226 // it might be killed by the alpha-test later
227 build_depth_test(parts, Z_TEST);
228 }
229
230 { // texture coordinates
231 Scratch scratches(registerFile());
232
233 // texel generation
234 build_textures(parts, regs);
235 if (registerFile().status())
236 return registerFile().status();
237 }
238
239 if ((blending & (FACTOR_DST|BLEND_DST)) ||
240 (mMasking && !mAllMasked) ||
241 (mLogicOp & LOGIC_OP_DST))
242 {
243 // blending / logic_op / masking need the framebuffer
244 mDstPixel.setTo(regs.obtain(), &mCbFormat);
245
246 // load the framebuffer pixel
247 comment("fetch color-buffer");
248 load(parts.cbPtr, mDstPixel);
249 }
250
251 if (registerFile().status())
252 return registerFile().status();
253
254 pixel_t pixel;
255 int directTex = mTextureMachine.directTexture;
256 if (directTex | parts.packed) {
257 // note: we can't have both here
258 // iterated color or direct texture
259 pixel = directTex ? parts.texel[directTex-1] : parts.iterated;
260 pixel.flags &= ~CORRUPTIBLE;
261 } else {
262 if (mDithering) {
263 const int ctxtReg = mBuilderContext.Rctx;
264 const int mask = GGL_DITHER_SIZE-1;
265 parts.dither = reg_t(regs.obtain());
266 AND(AL, 0, parts.dither.reg, parts.count.reg, imm(mask));
267 ADDR_ADD(AL, 0, parts.dither.reg, ctxtReg, parts.dither.reg);
268 LDRB(AL, parts.dither.reg, parts.dither.reg,
269 immed12_pre(GGL_OFFSETOF(ditherMatrix)));
270 }
271
272 // allocate a register for the resulting pixel
273 pixel.setTo(regs.obtain(), &mCbFormat, FIRST);
274
275 build_component(pixel, parts, GGLFormat::ALPHA, regs);
276
277 if (mAlphaTest!=GGL_ALWAYS) {
278 // only handle the z-write part here. We know z-test
279 // was successful, as well as alpha-test.
280 build_depth_test(parts, Z_WRITE);
281 }
282
283 build_component(pixel, parts, GGLFormat::RED, regs);
284 build_component(pixel, parts, GGLFormat::GREEN, regs);
285 build_component(pixel, parts, GGLFormat::BLUE, regs);
286
287 pixel.flags |= CORRUPTIBLE;
288 }
289
290 if (registerFile().status())
291 return registerFile().status();
292
293 if (pixel.reg == -1) {
294 // be defensive here. if we're here it's probably
295 // that this whole fragment is a no-op.
296 pixel = mDstPixel;
297 }
298
299 if (!mAllMasked) {
300 // logic operation
301 build_logic_op(pixel, regs);
302
303 // masking
304 build_masking(pixel, regs);
305
306 comment("store");
307 store(parts.cbPtr, pixel, WRITE_BACK);
308 }
309 }
310
311 if (registerFile().status())
312 return registerFile().status();
313
314 // update the iterated color...
315 if (parts.reload != 3) {
316 build_smooth_shade(parts);
317 }
318
319 // update iterated z
320 build_iterate_z(parts);
321
322 // update iterated fog
323 build_iterate_f(parts);
324
325 SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16));
326 B(PL, "fragment_loop");
327 label("epilog");
328 epilog(registerFile().touched());
329
330 if ((mAlphaTest!=GGL_ALWAYS) || (mDepthTest!=GGL_ALWAYS)) {
331 if (mDepthTest!=GGL_ALWAYS) {
332 label("discard_before_textures");
333 build_iterate_texture_coordinates(parts);
334 }
335 label("discard_after_textures");
336 build_smooth_shade(parts);
337 build_iterate_z(parts);
338 build_iterate_f(parts);
339 if (!mAllMasked) {
340 ADDR_ADD(AL, 0, parts.cbPtr.reg, parts.cbPtr.reg, imm(parts.cbPtr.size>>3));
341 }
342 SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16));
343 B(PL, "fragment_loop");
344 epilog(registerFile().touched());
345 }
346
347 return registerFile().status();
348 }
349
350 // ---------------------------------------------------------------------------
351
build_scanline_prolog(fragment_parts_t & parts,const needs_t & needs)352 void GGLAssembler::build_scanline_prolog(
353 fragment_parts_t& parts, const needs_t& needs)
354 {
355 Scratch scratches(registerFile());
356 int Rctx = mBuilderContext.Rctx;
357
358 // compute count
359 comment("compute ct (# of pixels to process)");
360 parts.count.setTo(obtainReg());
361 int Rx = scratches.obtain();
362 int Ry = scratches.obtain();
363 CONTEXT_LOAD(Rx, iterators.xl);
364 CONTEXT_LOAD(parts.count.reg, iterators.xr);
365 CONTEXT_LOAD(Ry, iterators.y);
366
367 // parts.count = iterators.xr - Rx
368 SUB(AL, 0, parts.count.reg, parts.count.reg, Rx);
369 SUB(AL, 0, parts.count.reg, parts.count.reg, imm(1));
370
371 if (mDithering) {
372 // parts.count.reg = 0xNNNNXXDD
373 // NNNN = count-1
374 // DD = dither offset
375 // XX = 0xxxxxxx (x = garbage)
376 Scratch scratches(registerFile());
377 int tx = scratches.obtain();
378 int ty = scratches.obtain();
379 AND(AL, 0, tx, Rx, imm(GGL_DITHER_MASK));
380 AND(AL, 0, ty, Ry, imm(GGL_DITHER_MASK));
381 ADD(AL, 0, tx, tx, reg_imm(ty, LSL, GGL_DITHER_ORDER_SHIFT));
382 ORR(AL, 0, parts.count.reg, tx, reg_imm(parts.count.reg, LSL, 16));
383 } else {
384 // parts.count.reg = 0xNNNN0000
385 // NNNN = count-1
386 MOV(AL, 0, parts.count.reg, reg_imm(parts.count.reg, LSL, 16));
387 }
388
389 if (!mAllMasked) {
390 // compute dst ptr
391 comment("compute color-buffer pointer");
392 const int cb_bits = mCbFormat.size*8;
393 int Rs = scratches.obtain();
394 parts.cbPtr.setTo(obtainReg(), cb_bits);
395 CONTEXT_LOAD(Rs, state.buffers.color.stride);
396 CONTEXT_ADDR_LOAD(parts.cbPtr.reg, state.buffers.color.data);
397 SMLABB(AL, Rs, Ry, Rs, Rx); // Rs = Rx + Ry*Rs
398 base_offset(parts.cbPtr, parts.cbPtr, Rs);
399 scratches.recycle(Rs);
400 }
401
402 // init fog
403 const int need_fog = GGL_READ_NEEDS(P_FOG, needs.p);
404 if (need_fog) {
405 comment("compute initial fog coordinate");
406 Scratch scratches(registerFile());
407 int dfdx = scratches.obtain();
408 int ydfdy = scratches.obtain();
409 int f = ydfdy;
410 CONTEXT_LOAD(dfdx, generated_vars.dfdx);
411 CONTEXT_LOAD(ydfdy, iterators.ydfdy);
412 MLA(AL, 0, f, Rx, dfdx, ydfdy);
413 CONTEXT_STORE(f, generated_vars.f);
414 }
415
416 // init Z coordinate
417 if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) {
418 parts.z = reg_t(obtainReg());
419 comment("compute initial Z coordinate");
420 Scratch scratches(registerFile());
421 int dzdx = scratches.obtain();
422 int ydzdy = parts.z.reg;
423 CONTEXT_LOAD(dzdx, generated_vars.dzdx); // 1.31 fixed-point
424 CONTEXT_LOAD(ydzdy, iterators.ydzdy); // 1.31 fixed-point
425 MLA(AL, 0, parts.z.reg, Rx, dzdx, ydzdy);
426
427 // we're going to index zbase of parts.count
428 // zbase = base + (xl-count + stride*y)*2
429 int Rs = dzdx;
430 int zbase = scratches.obtain();
431 CONTEXT_LOAD(Rs, state.buffers.depth.stride);
432 CONTEXT_ADDR_LOAD(zbase, state.buffers.depth.data);
433 SMLABB(AL, Rs, Ry, Rs, Rx);
434 ADD(AL, 0, Rs, Rs, reg_imm(parts.count.reg, LSR, 16));
435 ADDR_ADD(AL, 0, zbase, zbase, reg_imm(Rs, LSL, 1));
436 CONTEXT_ADDR_STORE(zbase, generated_vars.zbase);
437 }
438
439 // init texture coordinates
440 init_textures(parts.coords, reg_t(Rx), reg_t(Ry));
441 scratches.recycle(Ry);
442
443 // iterated color
444 init_iterated_color(parts, reg_t(Rx));
445
446 // init coverage factor application (anti-aliasing)
447 if (mAA) {
448 parts.covPtr.setTo(obtainReg(), 16);
449 CONTEXT_ADDR_LOAD(parts.covPtr.reg, state.buffers.coverage);
450 ADDR_ADD(AL, 0, parts.covPtr.reg, parts.covPtr.reg, reg_imm(Rx, LSL, 1));
451 }
452 }
453
454 // ---------------------------------------------------------------------------
455
build_component(pixel_t & pixel,const fragment_parts_t & parts,int component,Scratch & regs)456 void GGLAssembler::build_component( pixel_t& pixel,
457 const fragment_parts_t& parts,
458 int component,
459 Scratch& regs)
460 {
461 static char const * comments[] = {"alpha", "red", "green", "blue"};
462 comment(comments[component]);
463
464 // local register file
465 Scratch scratches(registerFile());
466 const int dst_component_size = pixel.component_size(component);
467
468 component_t temp(-1);
469 build_incoming_component( temp, dst_component_size,
470 parts, component, scratches, regs);
471
472 if (mInfo[component].inDest) {
473
474 // blending...
475 build_blending( temp, mDstPixel, component, scratches );
476
477 // downshift component and rebuild pixel...
478 downshift(pixel, component, temp, parts.dither);
479 }
480 }
481
build_incoming_component(component_t & temp,int dst_size,const fragment_parts_t & parts,int component,Scratch & scratches,Scratch & global_regs)482 void GGLAssembler::build_incoming_component(
483 component_t& temp,
484 int dst_size,
485 const fragment_parts_t& parts,
486 int component,
487 Scratch& scratches,
488 Scratch& global_regs)
489 {
490 const uint32_t component_mask = 1<<component;
491
492 // Figure out what we need for the blending stage...
493 int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
494 int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
495 if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA) {
496 fs = GGL_ONE;
497 }
498
499 // Figure out what we need to extract and for what reason
500 const int blending = blending_codes(fs, fd);
501
502 // Are we actually going to blend?
503 const int need_blending = (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO));
504
505 // expand the source if the destination has more bits
506 int need_expander = false;
507 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT-1 ; i++) {
508 texture_unit_t& tmu = mTextureMachine.tmu[i];
509 if ((tmu.format_idx) &&
510 (parts.texel[i].component_size(component) < dst_size)) {
511 need_expander = true;
512 }
513 }
514
515 // do we need to extract this component?
516 const bool multiTexture = mTextureMachine.activeUnits > 1;
517 const int blend_needs_alpha_source = (component==GGLFormat::ALPHA) &&
518 (isAlphaSourceNeeded());
519 int need_extract = mInfo[component].needed;
520 if (mInfo[component].inDest)
521 {
522 need_extract |= ((need_blending ?
523 (blending & (BLEND_SRC|FACTOR_SRC)) : need_expander));
524 need_extract |= (mTextureMachine.mask != mTextureMachine.replaced);
525 need_extract |= mInfo[component].smooth;
526 need_extract |= mInfo[component].fog;
527 need_extract |= mDithering;
528 need_extract |= multiTexture;
529 }
530
531 if (need_extract) {
532 Scratch& regs = blend_needs_alpha_source ? global_regs : scratches;
533 component_t fragment;
534
535 // iterated color
536 build_iterated_color(fragment, parts, component, regs);
537
538 // texture environement (decal, modulate, replace)
539 build_texture_environment(fragment, parts, component, regs);
540
541 // expand the source if the destination has more bits
542 if (need_expander && (fragment.size() < dst_size)) {
543 // we're here only if we fetched a texel
544 // (so we know for sure fragment is CORRUPTIBLE)
545 expand(fragment, fragment, dst_size);
546 }
547
548 // We have a few specific things to do for the alpha-channel
549 if ((component==GGLFormat::ALPHA) &&
550 (mInfo[component].needed || fragment.size()<dst_size))
551 {
552 // convert to integer_t first and make sure
553 // we don't corrupt a needed register
554 if (fragment.l) {
555 component_t incoming(fragment);
556 modify(fragment, regs);
557 MOV(AL, 0, fragment.reg, reg_imm(incoming.reg, LSR, incoming.l));
558 fragment.h -= fragment.l;
559 fragment.l = 0;
560 }
561
562 // coverage factor application
563 build_coverage_application(fragment, parts, regs);
564
565 // alpha-test
566 build_alpha_test(fragment, parts);
567
568 if (blend_needs_alpha_source) {
569 // We keep only 8 bits for the blending stage
570 const int shift = fragment.h <= 8 ? 0 : fragment.h-8;
571 if (fragment.flags & CORRUPTIBLE) {
572 fragment.flags &= ~CORRUPTIBLE;
573 mAlphaSource.setTo(fragment.reg,
574 fragment.size(), fragment.flags);
575 if (shift) {
576 MOV(AL, 0, mAlphaSource.reg,
577 reg_imm(mAlphaSource.reg, LSR, shift));
578 }
579 } else {
580 // XXX: it would better to do this in build_blend_factor()
581 // so we can avoid the extra MOV below.
582 mAlphaSource.setTo(regs.obtain(),
583 fragment.size(), CORRUPTIBLE);
584 if (shift) {
585 MOV(AL, 0, mAlphaSource.reg,
586 reg_imm(fragment.reg, LSR, shift));
587 } else {
588 MOV(AL, 0, mAlphaSource.reg, fragment.reg);
589 }
590 }
591 mAlphaSource.s -= shift;
592 }
593 }
594
595 // fog...
596 build_fog( fragment, component, regs );
597
598 temp = fragment;
599 } else {
600 if (mInfo[component].inDest) {
601 // extraction not needed and replace
602 // we just select the right component
603 if ((mTextureMachine.replaced & component_mask) == 0) {
604 // component wasn't replaced, so use it!
605 temp = component_t(parts.iterated, component);
606 }
607 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
608 const texture_unit_t& tmu = mTextureMachine.tmu[i];
609 if ((tmu.mask & component_mask) &&
610 ((tmu.replaced & component_mask) == 0)) {
611 temp = component_t(parts.texel[i], component);
612 }
613 }
614 }
615 }
616 }
617
isAlphaSourceNeeded() const618 bool GGLAssembler::isAlphaSourceNeeded() const
619 {
620 // XXX: also needed for alpha-test
621 const int bs = mBlendSrc;
622 const int bd = mBlendDst;
623 return bs==GGL_SRC_ALPHA_SATURATE ||
624 bs==GGL_SRC_ALPHA || bs==GGL_ONE_MINUS_SRC_ALPHA ||
625 bd==GGL_SRC_ALPHA || bd==GGL_ONE_MINUS_SRC_ALPHA ;
626 }
627
628 // ---------------------------------------------------------------------------
629
build_smooth_shade(const fragment_parts_t & parts)630 void GGLAssembler::build_smooth_shade(const fragment_parts_t& parts)
631 {
632 if (mSmooth && !parts.iterated_packed) {
633 // update the iterated color in a pipelined way...
634 comment("update iterated color");
635 Scratch scratches(registerFile());
636
637 const int reload = parts.reload;
638 for (int i=0 ; i<4 ; i++) {
639 if (!mInfo[i].iterated)
640 continue;
641
642 int c = parts.argb[i].reg;
643 int dx = parts.argb_dx[i].reg;
644
645 if (reload & 1) {
646 c = scratches.obtain();
647 CONTEXT_LOAD(c, generated_vars.argb[i].c);
648 }
649 if (reload & 2) {
650 dx = scratches.obtain();
651 CONTEXT_LOAD(dx, generated_vars.argb[i].dx);
652 }
653
654 if (mSmooth) {
655 ADD(AL, 0, c, c, dx);
656 }
657
658 if (reload & 1) {
659 CONTEXT_STORE(c, generated_vars.argb[i].c);
660 scratches.recycle(c);
661 }
662 if (reload & 2) {
663 scratches.recycle(dx);
664 }
665 }
666 }
667 }
668
669 // ---------------------------------------------------------------------------
670
build_coverage_application(component_t & fragment,const fragment_parts_t & parts,Scratch & regs)671 void GGLAssembler::build_coverage_application(component_t& fragment,
672 const fragment_parts_t& parts, Scratch& regs)
673 {
674 // here fragment.l is guarenteed to be 0
675 if (mAA) {
676 // coverages are 1.15 fixed-point numbers
677 comment("coverage application");
678
679 component_t incoming(fragment);
680 modify(fragment, regs);
681
682 Scratch scratches(registerFile());
683 int cf = scratches.obtain();
684 LDRH(AL, cf, parts.covPtr.reg, immed8_post(2));
685 if (fragment.h > 31) {
686 fragment.h--;
687 SMULWB(AL, fragment.reg, incoming.reg, cf);
688 } else {
689 MOV(AL, 0, fragment.reg, reg_imm(incoming.reg, LSL, 1));
690 SMULWB(AL, fragment.reg, fragment.reg, cf);
691 }
692 }
693 }
694
695 // ---------------------------------------------------------------------------
696
build_alpha_test(component_t & fragment,const fragment_parts_t &)697 void GGLAssembler::build_alpha_test(component_t& fragment,
698 const fragment_parts_t& /*parts*/)
699 {
700 if (mAlphaTest != GGL_ALWAYS) {
701 comment("Alpha Test");
702 Scratch scratches(registerFile());
703 int ref = scratches.obtain();
704 const int shift = GGL_COLOR_BITS-fragment.size();
705 CONTEXT_LOAD(ref, state.alpha_test.ref);
706 if (shift) CMP(AL, fragment.reg, reg_imm(ref, LSR, shift));
707 else CMP(AL, fragment.reg, ref);
708 int cc = NV;
709 switch (mAlphaTest) {
710 case GGL_NEVER: cc = NV; break;
711 case GGL_LESS: cc = LT; break;
712 case GGL_EQUAL: cc = EQ; break;
713 case GGL_LEQUAL: cc = LS; break;
714 case GGL_GREATER: cc = HI; break;
715 case GGL_NOTEQUAL: cc = NE; break;
716 case GGL_GEQUAL: cc = HS; break;
717 }
718 B(cc^1, "discard_after_textures");
719 }
720 }
721
722 // ---------------------------------------------------------------------------
723
build_depth_test(const fragment_parts_t & parts,uint32_t mask)724 void GGLAssembler::build_depth_test(
725 const fragment_parts_t& parts, uint32_t mask)
726 {
727 mask &= Z_TEST|Z_WRITE;
728 const needs_t& needs = mBuilderContext.needs;
729 const int zmask = GGL_READ_NEEDS(P_MASK_Z, needs.p);
730 Scratch scratches(registerFile());
731
732 if (mDepthTest != GGL_ALWAYS || zmask) {
733 int cc=AL, ic=AL;
734 switch (mDepthTest) {
735 case GGL_LESS: ic = HI; break;
736 case GGL_EQUAL: ic = EQ; break;
737 case GGL_LEQUAL: ic = HS; break;
738 case GGL_GREATER: ic = LT; break;
739 case GGL_NOTEQUAL: ic = NE; break;
740 case GGL_GEQUAL: ic = LS; break;
741 case GGL_NEVER:
742 // this never happens, because it's taken care of when
743 // computing the needs. but we keep it for completness.
744 comment("Depth Test (NEVER)");
745 B(AL, "discard_before_textures");
746 return;
747 case GGL_ALWAYS:
748 // we're here because zmask is enabled
749 mask &= ~Z_TEST; // test always passes.
750 break;
751 }
752
753 // inverse the condition
754 cc = ic^1;
755
756 if ((mask & Z_WRITE) && !zmask) {
757 mask &= ~Z_WRITE;
758 }
759
760 if (!mask)
761 return;
762
763 comment("Depth Test");
764
765 int zbase = scratches.obtain();
766 int depth = scratches.obtain();
767 int z = parts.z.reg;
768
769 CONTEXT_ADDR_LOAD(zbase, generated_vars.zbase); // stall
770 ADDR_SUB(AL, 0, zbase, zbase, reg_imm(parts.count.reg, LSR, 15));
771 // above does zbase = zbase + ((count >> 16) << 1)
772
773 if (mask & Z_TEST) {
774 LDRH(AL, depth, zbase); // stall
775 CMP(AL, depth, reg_imm(z, LSR, 16));
776 B(cc, "discard_before_textures");
777 }
778 if (mask & Z_WRITE) {
779 if (mask == Z_WRITE) {
780 // only z-write asked, cc is meaningless
781 ic = AL;
782 }
783 MOV(AL, 0, depth, reg_imm(z, LSR, 16));
784 STRH(ic, depth, zbase);
785 }
786 }
787 }
788
build_iterate_z(const fragment_parts_t & parts)789 void GGLAssembler::build_iterate_z(const fragment_parts_t& parts)
790 {
791 const needs_t& needs = mBuilderContext.needs;
792 if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) {
793 Scratch scratches(registerFile());
794 int dzdx = scratches.obtain();
795 CONTEXT_LOAD(dzdx, generated_vars.dzdx); // stall
796 ADD(AL, 0, parts.z.reg, parts.z.reg, dzdx);
797 }
798 }
799
build_iterate_f(const fragment_parts_t &)800 void GGLAssembler::build_iterate_f(const fragment_parts_t& /*parts*/)
801 {
802 const needs_t& needs = mBuilderContext.needs;
803 if (GGL_READ_NEEDS(P_FOG, needs.p)) {
804 Scratch scratches(registerFile());
805 int dfdx = scratches.obtain();
806 int f = scratches.obtain();
807 CONTEXT_LOAD(f, generated_vars.f);
808 CONTEXT_LOAD(dfdx, generated_vars.dfdx); // stall
809 ADD(AL, 0, f, f, dfdx);
810 CONTEXT_STORE(f, generated_vars.f);
811 }
812 }
813
814 // ---------------------------------------------------------------------------
815
build_logic_op(pixel_t & pixel,Scratch & regs)816 void GGLAssembler::build_logic_op(pixel_t& pixel, Scratch& regs)
817 {
818 const needs_t& needs = mBuilderContext.needs;
819 const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR;
820 if (opcode == GGL_COPY)
821 return;
822
823 comment("logic operation");
824
825 pixel_t s(pixel);
826 if (!(pixel.flags & CORRUPTIBLE)) {
827 pixel.reg = regs.obtain();
828 pixel.flags |= CORRUPTIBLE;
829 }
830
831 pixel_t d(mDstPixel);
832 switch(opcode) {
833 case GGL_CLEAR: MOV(AL, 0, pixel.reg, imm(0)); break;
834 case GGL_AND: AND(AL, 0, pixel.reg, s.reg, d.reg); break;
835 case GGL_AND_REVERSE: BIC(AL, 0, pixel.reg, s.reg, d.reg); break;
836 case GGL_COPY: break;
837 case GGL_AND_INVERTED: BIC(AL, 0, pixel.reg, d.reg, s.reg); break;
838 case GGL_NOOP: MOV(AL, 0, pixel.reg, d.reg); break;
839 case GGL_XOR: EOR(AL, 0, pixel.reg, s.reg, d.reg); break;
840 case GGL_OR: ORR(AL, 0, pixel.reg, s.reg, d.reg); break;
841 case GGL_NOR: ORR(AL, 0, pixel.reg, s.reg, d.reg);
842 MVN(AL, 0, pixel.reg, pixel.reg); break;
843 case GGL_EQUIV: EOR(AL, 0, pixel.reg, s.reg, d.reg);
844 MVN(AL, 0, pixel.reg, pixel.reg); break;
845 case GGL_INVERT: MVN(AL, 0, pixel.reg, d.reg); break;
846 case GGL_OR_REVERSE: // s | ~d == ~(~s & d)
847 BIC(AL, 0, pixel.reg, d.reg, s.reg);
848 MVN(AL, 0, pixel.reg, pixel.reg); break;
849 case GGL_COPY_INVERTED: MVN(AL, 0, pixel.reg, s.reg); break;
850 case GGL_OR_INVERTED: // ~s | d == ~(s & ~d)
851 BIC(AL, 0, pixel.reg, s.reg, d.reg);
852 MVN(AL, 0, pixel.reg, pixel.reg); break;
853 case GGL_NAND: AND(AL, 0, pixel.reg, s.reg, d.reg);
854 MVN(AL, 0, pixel.reg, pixel.reg); break;
855 case GGL_SET: MVN(AL, 0, pixel.reg, imm(0)); break;
856 };
857 }
858
859 // ---------------------------------------------------------------------------
860
find_bottom(uint32_t val)861 static uint32_t find_bottom(uint32_t val)
862 {
863 uint32_t i = 0;
864 while (!(val & (3<<i)))
865 i+= 2;
866 return i;
867 }
868
normalize(uint32_t & val,uint32_t & rot)869 static void normalize(uint32_t& val, uint32_t& rot)
870 {
871 rot = 0;
872 while (!(val&3) || (val & 0xFC000000)) {
873 uint32_t newval;
874 newval = val >> 2;
875 newval |= (val&3) << 30;
876 val = newval;
877 rot += 2;
878 if (rot == 32) {
879 rot = 0;
880 break;
881 }
882 }
883 }
884
build_and_immediate(int d,int s,uint32_t mask,int bits)885 void GGLAssembler::build_and_immediate(int d, int s, uint32_t mask, int bits)
886 {
887 uint32_t rot;
888 uint32_t size = ((bits>=32) ? 0 : (1LU << bits)) - 1;
889 mask &= size;
890
891 if (mask == size) {
892 if (d != s)
893 MOV( AL, 0, d, s);
894 return;
895 }
896
897 if ((getCodegenArch() == CODEGEN_ARCH_MIPS) ||
898 (getCodegenArch() == CODEGEN_ARCH_MIPS64)) {
899 // MIPS can do 16-bit imm in 1 instr, 32-bit in 3 instr
900 // the below ' while (mask)' code is buggy on mips
901 // since mips returns true on isValidImmediate()
902 // then we get multiple AND instr (positive logic)
903 AND( AL, 0, d, s, imm(mask) );
904 return;
905 }
906 else if (getCodegenArch() == CODEGEN_ARCH_ARM64) {
907 AND( AL, 0, d, s, imm(mask) );
908 return;
909 }
910
911 int negative_logic = !isValidImmediate(mask);
912 if (negative_logic) {
913 mask = ~mask & size;
914 }
915 normalize(mask, rot);
916
917 if (mask) {
918 while (mask) {
919 uint32_t bitpos = find_bottom(mask);
920 int shift = rot + bitpos;
921 uint32_t m = mask & (0xff << bitpos);
922 mask &= ~m;
923 m >>= bitpos;
924 int32_t newMask = (m<<shift) | (m>>(32-shift));
925 if (!negative_logic) {
926 AND( AL, 0, d, s, imm(newMask) );
927 } else {
928 BIC( AL, 0, d, s, imm(newMask) );
929 }
930 s = d;
931 }
932 } else {
933 MOV( AL, 0, d, imm(0));
934 }
935 }
936
build_masking(pixel_t & pixel,Scratch & regs)937 void GGLAssembler::build_masking(pixel_t& pixel, Scratch& regs)
938 {
939 if (!mMasking || mAllMasked) {
940 return;
941 }
942
943 comment("color mask");
944
945 pixel_t fb(mDstPixel);
946 pixel_t s(pixel);
947 if (!(pixel.flags & CORRUPTIBLE)) {
948 pixel.reg = regs.obtain();
949 pixel.flags |= CORRUPTIBLE;
950 }
951
952 int mask = 0;
953 for (int i=0 ; i<4 ; i++) {
954 const int component_mask = 1<<i;
955 const int h = fb.format.c[i].h;
956 const int l = fb.format.c[i].l;
957 if (h && (!(mMasking & component_mask))) {
958 mask |= ((1<<(h-l))-1) << l;
959 }
960 }
961
962 // There is no need to clear the masked components of the source
963 // (unless we applied a logic op), because they're already zeroed
964 // by construction (masked components are not computed)
965
966 if (mLogicOp) {
967 const needs_t& needs = mBuilderContext.needs;
968 const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR;
969 if (opcode != GGL_CLEAR) {
970 // clear masked component of source
971 build_and_immediate(pixel.reg, s.reg, mask, fb.size());
972 s = pixel;
973 }
974 }
975
976 // clear non masked components of destination
977 build_and_immediate(fb.reg, fb.reg, ~mask, fb.size());
978
979 // or back the channels that were masked
980 if (s.reg == fb.reg) {
981 // this is in fact a MOV
982 if (s.reg == pixel.reg) {
983 // ugh. this in in fact a nop
984 } else {
985 MOV(AL, 0, pixel.reg, fb.reg);
986 }
987 } else {
988 ORR(AL, 0, pixel.reg, s.reg, fb.reg);
989 }
990 }
991
992 // ---------------------------------------------------------------------------
993
base_offset(const pointer_t & d,const pointer_t & b,const reg_t & o)994 void GGLAssembler::base_offset(
995 const pointer_t& d, const pointer_t& b, const reg_t& o)
996 {
997 switch (b.size) {
998 case 32:
999 ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 2));
1000 break;
1001 case 24:
1002 if (d.reg == b.reg) {
1003 ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1));
1004 ADDR_ADD(AL, 0, d.reg, d.reg, o.reg);
1005 } else {
1006 ADDR_ADD(AL, 0, d.reg, o.reg, reg_imm(o.reg, LSL, 1));
1007 ADDR_ADD(AL, 0, d.reg, d.reg, b.reg);
1008 }
1009 break;
1010 case 16:
1011 ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1));
1012 break;
1013 case 8:
1014 ADDR_ADD(AL, 0, d.reg, b.reg, o.reg);
1015 break;
1016 }
1017 }
1018
1019 // ----------------------------------------------------------------------------
1020 // cheezy register allocator...
1021 // ----------------------------------------------------------------------------
1022
1023 // Modified to support MIPS processors, in a very simple way. We retain the
1024 // (Arm) limit of 16 total registers, but shift the mapping of those registers
1025 // from 0-15, to 2-17. Register 0 on Mips cannot be used as GP registers, and
1026 // register 1 has a traditional use as a temp).
1027
RegisterAllocator(int arch)1028 RegisterAllocator::RegisterAllocator(int arch) : mRegs(arch)
1029 {
1030 }
1031
reset()1032 void RegisterAllocator::reset()
1033 {
1034 mRegs.reset();
1035 }
1036
reserveReg(int reg)1037 int RegisterAllocator::reserveReg(int reg)
1038 {
1039 return mRegs.reserve(reg);
1040 }
1041
obtainReg()1042 int RegisterAllocator::obtainReg()
1043 {
1044 return mRegs.obtain();
1045 }
1046
recycleReg(int reg)1047 void RegisterAllocator::recycleReg(int reg)
1048 {
1049 mRegs.recycle(reg);
1050 }
1051
registerFile()1052 RegisterAllocator::RegisterFile& RegisterAllocator::registerFile()
1053 {
1054 return mRegs;
1055 }
1056
1057 // ----------------------------------------------------------------------------
1058
RegisterFile(int codegen_arch)1059 RegisterAllocator::RegisterFile::RegisterFile(int codegen_arch)
1060 : mRegs(0), mTouched(0), mStatus(0), mArch(codegen_arch), mRegisterOffset(0)
1061 {
1062 if ((mArch == ARMAssemblerInterface::CODEGEN_ARCH_MIPS) ||
1063 (mArch == ARMAssemblerInterface::CODEGEN_ARCH_MIPS64)) {
1064 mRegisterOffset = 2; // ARM has regs 0..15, MIPS offset to 2..17
1065 }
1066 reserve(ARMAssemblerInterface::SP);
1067 reserve(ARMAssemblerInterface::PC);
1068 }
1069
RegisterFile(const RegisterFile & rhs,int codegen_arch)1070 RegisterAllocator::RegisterFile::RegisterFile(const RegisterFile& rhs, int codegen_arch)
1071 : mRegs(rhs.mRegs), mTouched(rhs.mTouched), mArch(codegen_arch), mRegisterOffset(0)
1072 {
1073 if ((mArch == ARMAssemblerInterface::CODEGEN_ARCH_MIPS) ||
1074 (mArch == ARMAssemblerInterface::CODEGEN_ARCH_MIPS64)) {
1075 mRegisterOffset = 2; // ARM has regs 0..15, MIPS offset to 2..17
1076 }
1077 }
1078
~RegisterFile()1079 RegisterAllocator::RegisterFile::~RegisterFile()
1080 {
1081 }
1082
operator ==(const RegisterFile & rhs) const1083 bool RegisterAllocator::RegisterFile::operator == (const RegisterFile& rhs) const
1084 {
1085 return (mRegs == rhs.mRegs);
1086 }
1087
reset()1088 void RegisterAllocator::RegisterFile::reset()
1089 {
1090 mRegs = mTouched = mStatus = 0;
1091 reserve(ARMAssemblerInterface::SP);
1092 reserve(ARMAssemblerInterface::PC);
1093 }
1094
1095 // RegisterFile::reserve() take a register parameter in the
1096 // range 0-15 (Arm compatible), but on a Mips processor, will
1097 // return the actual allocated register in the range 2-17.
reserve(int reg)1098 int RegisterAllocator::RegisterFile::reserve(int reg)
1099 {
1100 reg += mRegisterOffset;
1101 LOG_ALWAYS_FATAL_IF(isUsed(reg),
1102 "reserving register %d, but already in use",
1103 reg);
1104 mRegs |= (1<<reg);
1105 mTouched |= mRegs;
1106 return reg;
1107 }
1108
1109 // This interface uses regMask in range 2-17 on MIPS, no translation.
reserveSeveral(uint32_t regMask)1110 void RegisterAllocator::RegisterFile::reserveSeveral(uint32_t regMask)
1111 {
1112 mRegs |= regMask;
1113 mTouched |= regMask;
1114 }
1115
isUsed(int reg) const1116 int RegisterAllocator::RegisterFile::isUsed(int reg) const
1117 {
1118 LOG_ALWAYS_FATAL_IF(reg>=16+(int)mRegisterOffset, "invalid register %d", reg);
1119 return mRegs & (1<<reg);
1120 }
1121
obtain()1122 int RegisterAllocator::RegisterFile::obtain()
1123 {
1124 const char priorityList[14] = { 0, 1, 2, 3,
1125 12, 14, 4, 5,
1126 6, 7, 8, 9,
1127 10, 11 };
1128 const int nbreg = sizeof(priorityList);
1129 int i, r, reg;
1130 for (i=0 ; i<nbreg ; i++) {
1131 r = priorityList[i];
1132 if (!isUsed(r + mRegisterOffset)) {
1133 break;
1134 }
1135 }
1136 // this is not an error anymore because, we'll try again with
1137 // a lower optimization level.
1138 //ALOGE_IF(i >= nbreg, "pixelflinger ran out of registers\n");
1139 if (i >= nbreg) {
1140 mStatus |= OUT_OF_REGISTERS;
1141 // we return SP so we can more easily debug things
1142 // the code will never be run anyway.
1143 return ARMAssemblerInterface::SP;
1144 }
1145 reg = reserve(r); // Param in Arm range 0-15, returns range 2-17 on Mips.
1146 return reg;
1147 }
1148
hasFreeRegs() const1149 bool RegisterAllocator::RegisterFile::hasFreeRegs() const
1150 {
1151 uint32_t regs = mRegs >> mRegisterOffset; // MIPS fix.
1152 return ((regs & 0xFFFF) == 0xFFFF) ? false : true;
1153 }
1154
countFreeRegs() const1155 int RegisterAllocator::RegisterFile::countFreeRegs() const
1156 {
1157 uint32_t regs = mRegs >> mRegisterOffset; // MIPS fix.
1158 int f = ~regs & 0xFFFF;
1159 // now count number of 1
1160 f = (f & 0x5555) + ((f>>1) & 0x5555);
1161 f = (f & 0x3333) + ((f>>2) & 0x3333);
1162 f = (f & 0x0F0F) + ((f>>4) & 0x0F0F);
1163 f = (f & 0x00FF) + ((f>>8) & 0x00FF);
1164 return f;
1165 }
1166
recycle(int reg)1167 void RegisterAllocator::RegisterFile::recycle(int reg)
1168 {
1169 // commented out, since common failure of running out of regs
1170 // triggers this assertion. Since the code is not execectued
1171 // in that case, it does not matter. No reason to FATAL err.
1172 // LOG_FATAL_IF(!isUsed(reg),
1173 // "recycling unallocated register %d",
1174 // reg);
1175 mRegs &= ~(1<<reg);
1176 }
1177
recycleSeveral(uint32_t regMask)1178 void RegisterAllocator::RegisterFile::recycleSeveral(uint32_t regMask)
1179 {
1180 // commented out, since common failure of running out of regs
1181 // triggers this assertion. Since the code is not execectued
1182 // in that case, it does not matter. No reason to FATAL err.
1183 // LOG_FATAL_IF((mRegs & regMask)!=regMask,
1184 // "recycling unallocated registers "
1185 // "(recycle=%08x, allocated=%08x, unallocated=%08x)",
1186 // regMask, mRegs, mRegs®Mask);
1187 mRegs &= ~regMask;
1188 }
1189
touched() const1190 uint32_t RegisterAllocator::RegisterFile::touched() const
1191 {
1192 return mTouched;
1193 }
1194
1195 // ----------------------------------------------------------------------------
1196
1197 }; // namespace android
1198
1199