1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "QuadRasterizer.hpp"
16 
17 #include "Primitive.hpp"
18 #include "Renderer.hpp"
19 #include "Shader/Constants.hpp"
20 #include "Common/Math.hpp"
21 #include "Common/Debug.hpp"
22 
23 namespace sw
24 {
25 	extern bool veryEarlyDepthTest;
26 	extern bool complementaryDepthBuffer;
27 	extern bool fullPixelPositionRegister;
28 
29 	extern int clusterCount;
30 
QuadRasterizer(const PixelProcessor::State & state,const PixelShader * pixelShader)31 	QuadRasterizer::QuadRasterizer(const PixelProcessor::State &state, const PixelShader *pixelShader) : state(state), shader(pixelShader)
32 	{
33 	}
34 
~QuadRasterizer()35 	QuadRasterizer::~QuadRasterizer()
36 	{
37 	}
38 
generate()39 	void QuadRasterizer::generate()
40 	{
41 		#if PERF_PROFILE
42 			for(int i = 0; i < PERF_TIMERS; i++)
43 			{
44 				cycles[i] = 0;
45 			}
46 
47 			Long pixelTime = Ticks();
48 		#endif
49 
50 		constants = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,constants));
51 		occlusion = 0;
52 		int clusterCount = Renderer::getClusterCount();
53 
54 		Do
55 		{
56 			Int yMin = *Pointer<Int>(primitive + OFFSET(Primitive,yMin));
57 			Int yMax = *Pointer<Int>(primitive + OFFSET(Primitive,yMax));
58 
59 			Int cluster2 = cluster + cluster;
60 			yMin += clusterCount * 2 - 2 - cluster2;
61 			yMin &= -clusterCount * 2;
62 			yMin += cluster2;
63 
64 			If(yMin < yMax)
65 			{
66 				rasterize(yMin, yMax);
67 			}
68 
69 			primitive += sizeof(Primitive) * state.multiSample;
70 			count--;
71 		}
72 		Until(count == 0)
73 
74 		if(state.occlusionEnabled)
75 		{
76 			UInt clusterOcclusion = *Pointer<UInt>(data + OFFSET(DrawData,occlusion) + 4 * cluster);
77 			clusterOcclusion += occlusion;
78 			*Pointer<UInt>(data + OFFSET(DrawData,occlusion) + 4 * cluster) = clusterOcclusion;
79 		}
80 
81 		#if PERF_PROFILE
82 			cycles[PERF_PIXEL] = Ticks() - pixelTime;
83 
84 			for(int i = 0; i < PERF_TIMERS; i++)
85 			{
86 				*Pointer<Long>(data + OFFSET(DrawData,cycles[i]) + 8 * cluster) += cycles[i];
87 			}
88 		#endif
89 
90 		Return();
91 	}
92 
rasterize(Int & yMin,Int & yMax)93 	void QuadRasterizer::rasterize(Int &yMin, Int &yMax)
94 	{
95 		Pointer<Byte> cBuffer[RENDERTARGETS];
96 		Pointer<Byte> zBuffer;
97 		Pointer<Byte> sBuffer;
98 
99 		for(int index = 0; index < RENDERTARGETS; index++)
100 		{
101 			if(state.colorWriteActive(index))
102 			{
103 				cBuffer[index] = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,colorBuffer[index])) + yMin * *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
104 			}
105 		}
106 
107 		if(state.depthTestActive)
108 		{
109 			zBuffer = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,depthBuffer)) + yMin * *Pointer<Int>(data + OFFSET(DrawData,depthPitchB));
110 		}
111 
112 		if(state.stencilActive)
113 		{
114 			sBuffer = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,stencilBuffer)) + yMin * *Pointer<Int>(data + OFFSET(DrawData,stencilPitchB));
115 		}
116 
117 		Int y = yMin;
118 
119 		Do
120 		{
121 			Int x0a = Int(*Pointer<Short>(primitive + OFFSET(Primitive,outline->left) + (y + 0) * sizeof(Primitive::Span)));
122 			Int x0b = Int(*Pointer<Short>(primitive + OFFSET(Primitive,outline->left) + (y + 1) * sizeof(Primitive::Span)));
123 			Int x0 = Min(x0a, x0b);
124 
125 			for(unsigned int q = 1; q < state.multiSample; q++)
126 			{
127 				x0a = Int(*Pointer<Short>(primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->left) + (y + 0) * sizeof(Primitive::Span)));
128 				x0b = Int(*Pointer<Short>(primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->left) + (y + 1) * sizeof(Primitive::Span)));
129 				x0 = Min(x0, Min(x0a, x0b));
130 			}
131 
132 			x0 &= 0xFFFFFFFE;
133 
134 			Int x1a = Int(*Pointer<Short>(primitive + OFFSET(Primitive,outline->right) + (y + 0) * sizeof(Primitive::Span)));
135 			Int x1b = Int(*Pointer<Short>(primitive + OFFSET(Primitive,outline->right) + (y + 1) * sizeof(Primitive::Span)));
136 			Int x1 = Max(x1a, x1b);
137 
138 			for(unsigned int q = 1; q < state.multiSample; q++)
139 			{
140 				x1a = Int(*Pointer<Short>(primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->right) + (y + 0) * sizeof(Primitive::Span)));
141 				x1b = Int(*Pointer<Short>(primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->right) + (y + 1) * sizeof(Primitive::Span)));
142 				x1 = Max(x1, Max(x1a, x1b));
143 			}
144 
145 			Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(primitive + OFFSET(Primitive,yQuad), 16);
146 
147 			if(interpolateZ())
148 			{
149 				for(unsigned int q = 0; q < state.multiSample; q++)
150 				{
151 					Float4 y = yyyy;
152 
153 					if(state.multiSample > 1)
154 					{
155 						y -= *Pointer<Float4>(constants + OFFSET(Constants,Y) + q * sizeof(float4));
156 					}
157 
158 					Dz[q] = *Pointer<Float4>(primitive + OFFSET(Primitive,z.C), 16) + y * *Pointer<Float4>(primitive + OFFSET(Primitive,z.B), 16);
159 				}
160 			}
161 
162 			if(veryEarlyDepthTest && state.multiSample == 1 && !state.depthOverride)
163 			{
164 				if(!state.stencilActive && state.depthTestActive && (state.depthCompareMode == DEPTH_LESSEQUAL || state.depthCompareMode == DEPTH_LESS))   // FIXME: Both modes ok?
165 				{
166 					Float4 xxxx = Float4(Float(x0)) + *Pointer<Float4>(primitive + OFFSET(Primitive,xQuad), 16);
167 
168 					Pointer<Byte> buffer;
169 					Int pitch;
170 
171 					if(!state.quadLayoutDepthBuffer)
172 					{
173 						buffer = zBuffer + 4 * x0;
174 						pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB));
175 					}
176 					else
177 					{
178 						buffer = zBuffer + 8 * x0;
179 					}
180 
181 					For(Int x = x0, x < x1, x += 2)
182 					{
183 						Float4 z = interpolate(xxxx, Dz[0], z, primitive + OFFSET(Primitive,z), false, false, state.depthClamp);
184 
185 						Float4 zValue;
186 
187 						if(!state.quadLayoutDepthBuffer)
188 						{
189 							// FIXME: Properly optimizes?
190 							zValue.xy = *Pointer<Float4>(buffer);
191 							zValue.zw = *Pointer<Float4>(buffer + pitch - 8);
192 						}
193 						else
194 						{
195 							zValue = *Pointer<Float4>(buffer, 16);
196 						}
197 
198 						Int4 zTest;
199 
200 						if(complementaryDepthBuffer)
201 						{
202 							zTest = CmpLE(zValue, z);
203 						}
204 						else
205 						{
206 							zTest = CmpNLT(zValue, z);
207 						}
208 
209 						Int zMask = SignMask(zTest);
210 
211 						If(zMask == 0)
212 						{
213 							x0 += 2;
214 						}
215 						Else
216 						{
217 							x = x1;
218 						}
219 
220 						xxxx += Float4(2);
221 
222 						if(!state.quadLayoutDepthBuffer)
223 						{
224 							buffer += 8;
225 						}
226 						else
227 						{
228 							buffer += 16;
229 						}
230 					}
231 				}
232 			}
233 
234 			If(x0 < x1)
235 			{
236 				if(interpolateW())
237 				{
238 					Dw = *Pointer<Float4>(primitive + OFFSET(Primitive,w.C), 16) + yyyy * *Pointer<Float4>(primitive + OFFSET(Primitive,w.B), 16);
239 				}
240 
241 				for(int interpolant = 0; interpolant < MAX_FRAGMENT_INPUTS; interpolant++)
242 				{
243 					for(int component = 0; component < 4; component++)
244 					{
245 						if(state.interpolant[interpolant].component & (1 << component))
246 						{
247 							Dv[interpolant][component] = *Pointer<Float4>(primitive + OFFSET(Primitive,V[interpolant][component].C), 16);
248 
249 							if(!(state.interpolant[interpolant].flat & (1 << component)))
250 							{
251 								Dv[interpolant][component] += yyyy * *Pointer<Float4>(primitive + OFFSET(Primitive,V[interpolant][component].B), 16);
252 							}
253 						}
254 					}
255 				}
256 
257 				if(state.fog.component)
258 				{
259 					Df = *Pointer<Float4>(primitive + OFFSET(Primitive,f.C), 16);
260 
261 					if(!state.fog.flat)
262 					{
263 						Df += yyyy * *Pointer<Float4>(primitive + OFFSET(Primitive,f.B), 16);
264 					}
265 				}
266 
267 				Short4 xLeft[4];
268 				Short4 xRight[4];
269 
270 				for(unsigned int q = 0; q < state.multiSample; q++)
271 				{
272 					xLeft[q] = *Pointer<Short4>(primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline) + y * sizeof(Primitive::Span));
273 					xRight[q] = xLeft[q];
274 
275 					xLeft[q] = Swizzle(xLeft[q], 0xA0) - Short4(1, 2, 1, 2);
276 					xRight[q] = Swizzle(xRight[q], 0xF5) - Short4(0, 1, 0, 1);
277 				}
278 
279 				For(Int x = x0, x < x1, x += 2)
280 				{
281 					Short4 xxxx = Short4(x);
282 					Int cMask[4];
283 
284 					for(unsigned int q = 0; q < state.multiSample; q++)
285 					{
286 						Short4 mask = CmpGT(xxxx, xLeft[q]) & CmpGT(xRight[q], xxxx);
287 						cMask[q] = SignMask(PackSigned(mask, mask)) & 0x0000000F;
288 					}
289 
290 					quad(cBuffer, zBuffer, sBuffer, cMask, x, y);
291 				}
292 			}
293 
294 			int clusterCount = Renderer::getClusterCount();
295 
296 			for(int index = 0; index < RENDERTARGETS; index++)
297 			{
298 				if(state.colorWriteActive(index))
299 				{
300 					cBuffer[index] += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])) << (1 + sw::log2(clusterCount));   // FIXME: Precompute
301 				}
302 			}
303 
304 			if(state.depthTestActive)
305 			{
306 				zBuffer += *Pointer<Int>(data + OFFSET(DrawData,depthPitchB)) << (1 + sw::log2(clusterCount));   // FIXME: Precompute
307 			}
308 
309 			if(state.stencilActive)
310 			{
311 				sBuffer += *Pointer<Int>(data + OFFSET(DrawData,stencilPitchB)) << (1 + sw::log2(clusterCount));   // FIXME: Precompute
312 			}
313 
314 			y += 2 * clusterCount;
315 		}
316 		Until(y >= yMax)
317 	}
318 
interpolate(Float4 & x,Float4 & D,Float4 & rhw,Pointer<Byte> planeEquation,bool flat,bool perspective,bool clamp)319 	Float4 QuadRasterizer::interpolate(Float4 &x, Float4 &D, Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective, bool clamp)
320 	{
321 		Float4 interpolant = D;
322 
323 		if(!flat)
324 		{
325 			interpolant += x * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation, A), 16);
326 
327 			if(perspective)
328 			{
329 				interpolant *= rhw;
330 			}
331 		}
332 
333 		if(clamp)
334 		{
335 			interpolant = Min(Max(interpolant, Float4(0.0f)), Float4(1.0f));
336 		}
337 
338 		return interpolant;
339 	}
340 
interpolateZ() const341 	bool QuadRasterizer::interpolateZ() const
342 	{
343 		return state.depthTestActive || state.pixelFogActive() || (shader && shader->isVPosDeclared() && fullPixelPositionRegister);
344 	}
345 
interpolateW() const346 	bool QuadRasterizer::interpolateW() const
347 	{
348 		return state.perspective || (shader && shader->isVPosDeclared() && fullPixelPositionRegister);
349 	}
350 }
351