1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "Blitter.hpp"
16 
17 #include "Shader/ShaderCore.hpp"
18 #include "Reactor/Reactor.hpp"
19 #include "Common/Memory.hpp"
20 #include "Common/Debug.hpp"
21 
22 namespace sw
23 {
24 	using namespace rr;
25 
Blitter()26 	Blitter::Blitter()
27 	{
28 		blitCache = new RoutineCache<State>(1024);
29 	}
30 
~Blitter()31 	Blitter::~Blitter()
32 	{
33 		delete blitCache;
34 	}
35 
clear(void * pixel,sw::Format format,Surface * dest,const SliceRect & dRect,unsigned int rgbaMask)36 	void Blitter::clear(void *pixel, sw::Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask)
37 	{
38 		if(fastClear(pixel, format, dest, dRect, rgbaMask))
39 		{
40 			return;
41 		}
42 
43 		sw::Surface *color = sw::Surface::create(1, 1, 1, format, pixel, sw::Surface::bytes(format), sw::Surface::bytes(format));
44 		SliceRectF sRect(0.5f, 0.5f, 0.5f, 0.5f, 0);   // Sample from the middle.
45 		blit(color, sRect, dest, dRect, {rgbaMask});
46 		delete color;
47 	}
48 
fastClear(void * pixel,sw::Format format,Surface * dest,const SliceRect & dRect,unsigned int rgbaMask)49 	bool Blitter::fastClear(void *pixel, sw::Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask)
50 	{
51 		if(format != FORMAT_A32B32G32R32F)
52 		{
53 			return false;
54 		}
55 
56 		float *color = (float*)pixel;
57 		float r = color[0];
58 		float g = color[1];
59 		float b = color[2];
60 		float a = color[3];
61 
62 		uint32_t packed;
63 
64 		switch(dest->getFormat())
65 		{
66 		case FORMAT_R5G6B5:
67 			if((rgbaMask & 0x7) != 0x7) return false;
68 			packed = ((uint16_t)(31 * b + 0.5f) << 0) |
69 			         ((uint16_t)(63 * g + 0.5f) << 5) |
70 			         ((uint16_t)(31 * r + 0.5f) << 11);
71 			break;
72 		case FORMAT_X8B8G8R8:
73 			if((rgbaMask & 0x7) != 0x7) return false;
74 			packed = ((uint32_t)(255) << 24) |
75 			         ((uint32_t)(255 * b + 0.5f) << 16) |
76 			         ((uint32_t)(255 * g + 0.5f) << 8) |
77 			         ((uint32_t)(255 * r + 0.5f) << 0);
78 			break;
79 		case FORMAT_A8B8G8R8:
80 			if((rgbaMask & 0xF) != 0xF) return false;
81 			packed = ((uint32_t)(255 * a + 0.5f) << 24) |
82 			         ((uint32_t)(255 * b + 0.5f) << 16) |
83 			         ((uint32_t)(255 * g + 0.5f) << 8) |
84 			         ((uint32_t)(255 * r + 0.5f) << 0);
85 			break;
86 		case FORMAT_X8R8G8B8:
87 			if((rgbaMask & 0x7) != 0x7) return false;
88 			packed = ((uint32_t)(255) << 24) |
89 			         ((uint32_t)(255 * r + 0.5f) << 16) |
90 			         ((uint32_t)(255 * g + 0.5f) << 8) |
91 			         ((uint32_t)(255 * b + 0.5f) << 0);
92 			break;
93 		case FORMAT_A8R8G8B8:
94 			if((rgbaMask & 0xF) != 0xF) return false;
95 			packed = ((uint32_t)(255 * a + 0.5f) << 24) |
96 			         ((uint32_t)(255 * r + 0.5f) << 16) |
97 			         ((uint32_t)(255 * g + 0.5f) << 8) |
98 			         ((uint32_t)(255 * b + 0.5f) << 0);
99 			break;
100 		default:
101 			return false;
102 		}
103 
104 		bool useDestInternal = !dest->isExternalDirty();
105 		uint8_t *slice = (uint8_t*)dest->lock(dRect.x0, dRect.y0, dRect.slice, sw::LOCK_WRITEONLY, sw::PUBLIC, useDestInternal);
106 
107 		for(int j = 0; j < dest->getSamples(); j++)
108 		{
109 			uint8_t *d = slice;
110 
111 			switch(Surface::bytes(dest->getFormat()))
112 			{
113 			case 2:
114 				for(int i = dRect.y0; i < dRect.y1; i++)
115 				{
116 					sw::clear((uint16_t*)d, packed, dRect.x1 - dRect.x0);
117 					d += dest->getPitchB(useDestInternal);
118 				}
119 				break;
120 			case 4:
121 				for(int i = dRect.y0; i < dRect.y1; i++)
122 				{
123 					sw::clear((uint32_t*)d, packed, dRect.x1 - dRect.x0);
124 					d += dest->getPitchB(useDestInternal);
125 				}
126 				break;
127 			default:
128 				assert(false);
129 			}
130 
131 			slice += dest->getSliceB(useDestInternal);
132 		}
133 
134 		dest->unlock(useDestInternal);
135 
136 		return true;
137 	}
138 
blit(Surface * source,const SliceRectF & sourceRect,Surface * dest,const SliceRect & destRect,const Blitter::Options & options)139 	void Blitter::blit(Surface *source, const SliceRectF &sourceRect, Surface *dest, const SliceRect &destRect, const Blitter::Options& options)
140 	{
141 		if(dest->getInternalFormat() == FORMAT_NULL)
142 		{
143 			return;
144 		}
145 
146 		if(blitReactor(source, sourceRect, dest, destRect, options))
147 		{
148 			return;
149 		}
150 
151 		SliceRectF sRect = sourceRect;
152 		SliceRect dRect = destRect;
153 
154 		bool flipX = destRect.x0 > destRect.x1;
155 		bool flipY = destRect.y0 > destRect.y1;
156 
157 		if(flipX)
158 		{
159 			swap(dRect.x0, dRect.x1);
160 			swap(sRect.x0, sRect.x1);
161 		}
162 		if(flipY)
163 		{
164 			swap(dRect.y0, dRect.y1);
165 			swap(sRect.y0, sRect.y1);
166 		}
167 
168 		source->lockInternal(0, 0, sRect.slice, sw::LOCK_READONLY, sw::PUBLIC);
169 		dest->lockInternal(0, 0, dRect.slice, sw::LOCK_WRITEONLY, sw::PUBLIC);
170 
171 		float w = sRect.width() / dRect.width();
172 		float h = sRect.height() / dRect.height();
173 
174 		float xStart = sRect.x0 + (0.5f - dRect.x0) * w;
175 		float yStart = sRect.y0 + (0.5f - dRect.y0) * h;
176 
177 		for(int j = dRect.y0; j < dRect.y1; j++)
178 		{
179 			float y = yStart + j * h;
180 
181 			for(int i = dRect.x0; i < dRect.x1; i++)
182 			{
183 				float x = xStart + i * w;
184 
185 				// FIXME: Support RGBA mask
186 				dest->copyInternal(source, i, j, x, y, options.filter);
187 			}
188 		}
189 
190 		source->unlockInternal();
191 		dest->unlockInternal();
192 	}
193 
blit3D(Surface * source,Surface * dest)194 	void Blitter::blit3D(Surface *source, Surface *dest)
195 	{
196 		source->lockInternal(0, 0, 0, sw::LOCK_READONLY, sw::PUBLIC);
197 		dest->lockInternal(0, 0, 0, sw::LOCK_WRITEONLY, sw::PUBLIC);
198 
199 		float w = static_cast<float>(source->getWidth())  / static_cast<float>(dest->getWidth());
200 		float h = static_cast<float>(source->getHeight()) / static_cast<float>(dest->getHeight());
201 		float d = static_cast<float>(source->getDepth())  / static_cast<float>(dest->getDepth());
202 
203 		for(int k = 0; k < dest->getDepth(); k++)
204 		{
205 			float z = (k + 0.5f) * d;
206 
207 			for(int j = 0; j < dest->getHeight(); j++)
208 			{
209 				float y = (j + 0.5f) * h;
210 
211 				for(int i = 0; i < dest->getWidth(); i++)
212 				{
213 					float x = (i + 0.5f) * w;
214 
215 					dest->copyInternal(source, i, j, k, x, y, z, true);
216 				}
217 			}
218 		}
219 
220 		source->unlockInternal();
221 		dest->unlockInternal();
222 	}
223 
read(Float4 & c,Pointer<Byte> element,const State & state)224 	bool Blitter::read(Float4 &c, Pointer<Byte> element, const State &state)
225 	{
226 		c = Float4(0.0f, 0.0f, 0.0f, 1.0f);
227 
228 		switch(state.sourceFormat)
229 		{
230 		case FORMAT_L8:
231 			c.xyz = Float(Int(*Pointer<Byte>(element)));
232 			c.w = float(0xFF);
233 			break;
234 		case FORMAT_A8:
235 			c.w = Float(Int(*Pointer<Byte>(element)));
236 			break;
237 		case FORMAT_R8I:
238 		case FORMAT_R8_SNORM:
239 			c.x = Float(Int(*Pointer<SByte>(element)));
240 			c.w = float(0x7F);
241 			break;
242 		case FORMAT_R8:
243 		case FORMAT_R8UI:
244 			c.x = Float(Int(*Pointer<Byte>(element)));
245 			c.w = float(0xFF);
246 			break;
247 		case FORMAT_R16I:
248 			c.x = Float(Int(*Pointer<Short>(element)));
249 			c.w = float(0x7FFF);
250 			break;
251 		case FORMAT_R16UI:
252 			c.x = Float(Int(*Pointer<UShort>(element)));
253 			c.w = float(0xFFFF);
254 			break;
255 		case FORMAT_R32I:
256 			c.x = Float(*Pointer<Int>(element));
257 			c.w = float(0x7FFFFFFF);
258 			break;
259 		case FORMAT_R32UI:
260 			c.x = Float(*Pointer<UInt>(element));
261 			c.w = float(0xFFFFFFFF);
262 			break;
263 		case FORMAT_A8R8G8B8:
264 			c = Float4(*Pointer<Byte4>(element)).zyxw;
265 			break;
266 		case FORMAT_A8B8G8R8I:
267 		case FORMAT_A8B8G8R8_SNORM:
268 			c = Float4(*Pointer<SByte4>(element));
269 			break;
270 		case FORMAT_A8B8G8R8:
271 		case FORMAT_A8B8G8R8UI:
272 		case FORMAT_SRGB8_A8:
273 			c = Float4(*Pointer<Byte4>(element));
274 			break;
275 		case FORMAT_X8R8G8B8:
276 			c = Float4(*Pointer<Byte4>(element)).zyxw;
277 			c.w = float(0xFF);
278 			break;
279 		case FORMAT_R8G8B8:
280 			c.z = Float(Int(*Pointer<Byte>(element + 0)));
281 			c.y = Float(Int(*Pointer<Byte>(element + 1)));
282 			c.x = Float(Int(*Pointer<Byte>(element + 2)));
283 			c.w = float(0xFF);
284 			break;
285 		case FORMAT_B8G8R8:
286 			c.x = Float(Int(*Pointer<Byte>(element + 0)));
287 			c.y = Float(Int(*Pointer<Byte>(element + 1)));
288 			c.z = Float(Int(*Pointer<Byte>(element + 2)));
289 			c.w = float(0xFF);
290 			break;
291 		case FORMAT_X8B8G8R8I:
292 		case FORMAT_X8B8G8R8_SNORM:
293 			c = Float4(*Pointer<SByte4>(element));
294 			c.w = float(0x7F);
295 			break;
296 		case FORMAT_X8B8G8R8:
297 		case FORMAT_X8B8G8R8UI:
298 		case FORMAT_SRGB8_X8:
299 			c = Float4(*Pointer<Byte4>(element));
300 			c.w = float(0xFF);
301 			break;
302 		case FORMAT_A16B16G16R16I:
303 			c = Float4(*Pointer<Short4>(element));
304 			break;
305 		case FORMAT_A16B16G16R16:
306 		case FORMAT_A16B16G16R16UI:
307 			c = Float4(*Pointer<UShort4>(element));
308 			break;
309 		case FORMAT_X16B16G16R16I:
310 			c = Float4(*Pointer<Short4>(element));
311 			c.w = float(0x7FFF);
312 			break;
313 		case FORMAT_X16B16G16R16UI:
314 			c = Float4(*Pointer<UShort4>(element));
315 			c.w = float(0xFFFF);
316 			break;
317 		case FORMAT_A32B32G32R32I:
318 			c = Float4(*Pointer<Int4>(element));
319 			break;
320 		case FORMAT_A32B32G32R32UI:
321 			c = Float4(*Pointer<UInt4>(element));
322 			break;
323 		case FORMAT_X32B32G32R32I:
324 			c = Float4(*Pointer<Int4>(element));
325 			c.w = float(0x7FFFFFFF);
326 			break;
327 		case FORMAT_X32B32G32R32UI:
328 			c = Float4(*Pointer<UInt4>(element));
329 			c.w = float(0xFFFFFFFF);
330 			break;
331 		case FORMAT_G8R8I:
332 		case FORMAT_G8R8_SNORM:
333 			c.x = Float(Int(*Pointer<SByte>(element + 0)));
334 			c.y = Float(Int(*Pointer<SByte>(element + 1)));
335 			c.w = float(0x7F);
336 			break;
337 		case FORMAT_G8R8:
338 		case FORMAT_G8R8UI:
339 			c.x = Float(Int(*Pointer<Byte>(element + 0)));
340 			c.y = Float(Int(*Pointer<Byte>(element + 1)));
341 			c.w = float(0xFF);
342 			break;
343 		case FORMAT_G16R16I:
344 			c.x = Float(Int(*Pointer<Short>(element + 0)));
345 			c.y = Float(Int(*Pointer<Short>(element + 2)));
346 			c.w = float(0x7FFF);
347 			break;
348 		case FORMAT_G16R16:
349 		case FORMAT_G16R16UI:
350 			c.x = Float(Int(*Pointer<UShort>(element + 0)));
351 			c.y = Float(Int(*Pointer<UShort>(element + 2)));
352 			c.w = float(0xFFFF);
353 			break;
354 		case FORMAT_G32R32I:
355 			c.x = Float(*Pointer<Int>(element + 0));
356 			c.y = Float(*Pointer<Int>(element + 4));
357 			c.w = float(0x7FFFFFFF);
358 			break;
359 		case FORMAT_G32R32UI:
360 			c.x = Float(*Pointer<UInt>(element + 0));
361 			c.y = Float(*Pointer<UInt>(element + 4));
362 			c.w = float(0xFFFFFFFF);
363 			break;
364 		case FORMAT_A32B32G32R32F:
365 			c = *Pointer<Float4>(element);
366 			break;
367 		case FORMAT_X32B32G32R32F:
368 		case FORMAT_X32B32G32R32F_UNSIGNED:
369 		case FORMAT_B32G32R32F:
370 			c.z = *Pointer<Float>(element + 8);
371 		case FORMAT_G32R32F:
372 			c.x = *Pointer<Float>(element + 0);
373 			c.y = *Pointer<Float>(element + 4);
374 			break;
375 		case FORMAT_R32F:
376 			c.x = *Pointer<Float>(element);
377 			break;
378 		case FORMAT_R5G6B5:
379 			c.x = Float(Int((*Pointer<UShort>(element) & UShort(0xF800)) >> UShort(11)));
380 			c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x07E0)) >> UShort(5)));
381 			c.z = Float(Int(*Pointer<UShort>(element) & UShort(0x001F)));
382 			break;
383 		case FORMAT_A2B10G10R10:
384 		case FORMAT_A2B10G10R10UI:
385 			c.x = Float(Int((*Pointer<UInt>(element) & UInt(0x000003FF))));
386 			c.y = Float(Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10));
387 			c.z = Float(Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20));
388 			c.w = Float(Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30));
389 			break;
390 		case FORMAT_D16:
391 			c.x = Float(Int((*Pointer<UShort>(element))));
392 			break;
393 		case FORMAT_D24S8:
394 		case FORMAT_D24X8:
395 			c.x = Float(Int((*Pointer<UInt>(element) & UInt(0xFFFFFF00)) >> 8));
396 			break;
397 		case FORMAT_D32:
398 			c.x = Float(Int((*Pointer<UInt>(element))));
399 			break;
400 		case FORMAT_D32F_COMPLEMENTARY:
401 		case FORMAT_D32FS8_COMPLEMENTARY:
402 			c.x = 1.0f - *Pointer<Float>(element);
403 			break;
404 		case FORMAT_D32F:
405 		case FORMAT_D32FS8:
406 		case FORMAT_D32F_LOCKABLE:
407 		case FORMAT_D32FS8_TEXTURE:
408 		case FORMAT_D32F_SHADOW:
409 		case FORMAT_D32FS8_SHADOW:
410 			c.x = *Pointer<Float>(element);
411 			break;
412 		case FORMAT_S8:
413 			c.x = Float(Int(*Pointer<Byte>(element)));
414 			break;
415 		default:
416 			return false;
417 		}
418 
419 		return true;
420 	}
421 
write(Float4 & c,Pointer<Byte> element,const State & state)422 	bool Blitter::write(Float4 &c, Pointer<Byte> element, const State &state)
423 	{
424 		bool writeR = state.writeRed;
425 		bool writeG = state.writeGreen;
426 		bool writeB = state.writeBlue;
427 		bool writeA = state.writeAlpha;
428 		bool writeRGBA = writeR && writeG && writeB && writeA;
429 
430 		switch(state.destFormat)
431 		{
432 		case FORMAT_L8:
433 			*Pointer<Byte>(element) = Byte(RoundInt(Float(c.x)));
434 			break;
435 		case FORMAT_A8:
436 			if(writeA) { *Pointer<Byte>(element) = Byte(RoundInt(Float(c.w))); }
437 			break;
438 		case FORMAT_A8R8G8B8:
439 			if(writeRGBA)
440 			{
441 				Short4 c0 = RoundShort4(c.zyxw);
442 				*Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0));
443 			}
444 			else
445 			{
446 				if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
447 				if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
448 				if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
449 				if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
450 			}
451 			break;
452 		case FORMAT_A8B8G8R8:
453 		case FORMAT_SRGB8_A8:
454 			if(writeRGBA)
455 			{
456 				Short4 c0 = RoundShort4(c);
457 				*Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0));
458 			}
459 			else
460 			{
461 				if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); }
462 				if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
463 				if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
464 				if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
465 			}
466 			break;
467 		case FORMAT_X8R8G8B8:
468 			if(writeRGBA)
469 			{
470 				Short4 c0 = RoundShort4(c.zyxw) | Short4(0x0000, 0x0000, 0x0000, 0x00FF);
471 				*Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0));
472 			}
473 			else
474 			{
475 				if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
476 				if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
477 				if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
478 				if(writeA) { *Pointer<Byte>(element + 3) = Byte(0xFF); }
479 			}
480 			break;
481 		case FORMAT_X8B8G8R8:
482 		case FORMAT_SRGB8_X8:
483 			if(writeRGBA)
484 			{
485 				Short4 c0 = RoundShort4(c) | Short4(0x0000, 0x0000, 0x0000, 0x00FF);
486 				*Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0));
487 			}
488 			else
489 			{
490 				if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); }
491 				if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
492 				if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
493 				if(writeA) { *Pointer<Byte>(element + 3) = Byte(0xFF); }
494 			}
495 			break;
496 		case FORMAT_R8G8B8:
497 			if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
498 			if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
499 			if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
500 			break;
501 		case FORMAT_B8G8R8:
502 			if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); }
503 			if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
504 			if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
505 			break;
506 		case FORMAT_A32B32G32R32F:
507 			if(writeRGBA)
508 			{
509 				*Pointer<Float4>(element) = c;
510 			}
511 			else
512 			{
513 				if(writeR) { *Pointer<Float>(element) = c.x; }
514 				if(writeG) { *Pointer<Float>(element + 4) = c.y; }
515 				if(writeB) { *Pointer<Float>(element + 8) = c.z; }
516 				if(writeA) { *Pointer<Float>(element + 12) = c.w; }
517 			}
518 			break;
519 		case FORMAT_X32B32G32R32F:
520 		case FORMAT_X32B32G32R32F_UNSIGNED:
521 			if(writeA) { *Pointer<Float>(element + 12) = 1.0f; }
522 		case FORMAT_B32G32R32F:
523 			if(writeR) { *Pointer<Float>(element) = c.x; }
524 			if(writeG) { *Pointer<Float>(element + 4) = c.y; }
525 			if(writeB) { *Pointer<Float>(element + 8) = c.z; }
526 			break;
527 		case FORMAT_G32R32F:
528 			if(writeR && writeG)
529 			{
530 				*Pointer<Float2>(element) = Float2(c);
531 			}
532 			else
533 			{
534 				if(writeR) { *Pointer<Float>(element) = c.x; }
535 				if(writeG) { *Pointer<Float>(element + 4) = c.y; }
536 			}
537 			break;
538 		case FORMAT_R32F:
539 			if(writeR) { *Pointer<Float>(element) = c.x; }
540 			break;
541 		case FORMAT_A8B8G8R8I:
542 		case FORMAT_A8B8G8R8_SNORM:
543 			if(writeA) { *Pointer<SByte>(element + 3) = SByte(RoundInt(Float(c.w))); }
544 		case FORMAT_X8B8G8R8I:
545 		case FORMAT_X8B8G8R8_SNORM:
546 			if(writeA && (state.destFormat == FORMAT_X8B8G8R8I || state.destFormat == FORMAT_X8B8G8R8_SNORM))
547 			{
548 				*Pointer<SByte>(element + 3) = SByte(0x7F);
549 			}
550 			if(writeB) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.z))); }
551 		case FORMAT_G8R8I:
552 		case FORMAT_G8R8_SNORM:
553 			if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); }
554 		case FORMAT_R8I:
555 		case FORMAT_R8_SNORM:
556 			if(writeR) { *Pointer<SByte>(element) = SByte(RoundInt(Float(c.x))); }
557 			break;
558 		case FORMAT_A8B8G8R8UI:
559 			if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
560 		case FORMAT_X8B8G8R8UI:
561 			if(writeA && (state.destFormat == FORMAT_X8B8G8R8UI))
562 			{
563 				*Pointer<Byte>(element + 3) = Byte(0xFF);
564 			}
565 			if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
566 		case FORMAT_G8R8UI:
567 		case FORMAT_G8R8:
568 			if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
569 		case FORMAT_R8UI:
570 		case FORMAT_R8:
571 			if(writeR) { *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x))); }
572 			break;
573 		case FORMAT_A16B16G16R16I:
574 			if(writeRGBA)
575 			{
576 				*Pointer<Short4>(element) = Short4(RoundInt(c));
577 			}
578 			else
579 			{
580 				if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
581 				if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
582 				if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); }
583 				if(writeA) { *Pointer<Short>(element + 6) = Short(RoundInt(Float(c.w))); }
584 			}
585 			break;
586 		case FORMAT_X16B16G16R16I:
587 			if(writeRGBA)
588 			{
589 				*Pointer<Short4>(element) = Short4(RoundInt(c));
590 			}
591 			else
592 			{
593 				if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
594 				if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
595 				if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); }
596 			}
597 			if(writeA) { *Pointer<Short>(element + 6) = Short(0x7F); }
598 			break;
599 		case FORMAT_G16R16I:
600 			if(writeR && writeG)
601 			{
602 				*Pointer<Short2>(element) = Short2(Short4(RoundInt(c)));
603 			}
604 			else
605 			{
606 				if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
607 				if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
608 			}
609 			break;
610 		case FORMAT_R16I:
611 			if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
612 			break;
613 		case FORMAT_A16B16G16R16UI:
614 		case FORMAT_A16B16G16R16:
615 			if(writeRGBA)
616 			{
617 				*Pointer<UShort4>(element) = UShort4(RoundInt(c));
618 			}
619 			else
620 			{
621 				if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
622 				if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
623 				if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); }
624 				if(writeA) { *Pointer<UShort>(element + 6) = UShort(RoundInt(Float(c.w))); }
625 			}
626 			break;
627 		case FORMAT_X16B16G16R16UI:
628 			if(writeRGBA)
629 			{
630 				*Pointer<UShort4>(element) = UShort4(RoundInt(c));
631 			}
632 			else
633 			{
634 				if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
635 				if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
636 				if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); }
637 			}
638 			if(writeA) { *Pointer<UShort>(element + 6) = UShort(0xFF); }
639 			break;
640 		case FORMAT_G16R16UI:
641 		case FORMAT_G16R16:
642 			if(writeR && writeG)
643 			{
644 				*Pointer<UShort2>(element) = UShort2(UShort4(RoundInt(c)));
645 			}
646 			else
647 			{
648 				if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
649 				if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
650 			}
651 			break;
652 		case FORMAT_R16UI:
653 			if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
654 			break;
655 		case FORMAT_A32B32G32R32I:
656 			if(writeRGBA)
657 			{
658 				*Pointer<Int4>(element) = RoundInt(c);
659 			}
660 			else
661 			{
662 				if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
663 				if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
664 				if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); }
665 				if(writeA) { *Pointer<Int>(element + 12) = RoundInt(Float(c.w)); }
666 			}
667 			break;
668 		case FORMAT_X32B32G32R32I:
669 			if(writeRGBA)
670 			{
671 				*Pointer<Int4>(element) = RoundInt(c);
672 			}
673 			else
674 			{
675 				if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
676 				if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
677 				if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); }
678 			}
679 			if(writeA) { *Pointer<Int>(element + 12) = Int(0x7FFFFFFF); }
680 			break;
681 		case FORMAT_G32R32I:
682 			if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
683 		case FORMAT_R32I:
684 			if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
685 			break;
686 		case FORMAT_A32B32G32R32UI:
687 			if(writeRGBA)
688 			{
689 				*Pointer<UInt4>(element) = UInt4(RoundInt(c));
690 			}
691 			else
692 			{
693 				if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
694 				if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
695 				if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); }
696 				if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(RoundInt(Float(c.w))); }
697 			}
698 			break;
699 		case FORMAT_X32B32G32R32UI:
700 			if(writeRGBA)
701 			{
702 				*Pointer<UInt4>(element) = UInt4(RoundInt(c));
703 			}
704 			else
705 			{
706 				if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
707 				if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
708 				if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); }
709 			}
710 			if(writeA) { *Pointer<UInt4>(element + 12) = UInt4(0xFFFFFFFF); }
711 			break;
712 		case FORMAT_G32R32UI:
713 			if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
714 		case FORMAT_R32UI:
715 			if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
716 			break;
717 		case FORMAT_R5G6B5:
718 			if(writeR && writeG && writeB)
719 			{
720 				*Pointer<UShort>(element) = UShort(RoundInt(Float(c.z)) |
721 				                                  (RoundInt(Float(c.y)) << Int(5)) |
722 				                                  (RoundInt(Float(c.x)) << Int(11)));
723 			}
724 			else
725 			{
726 				unsigned short mask = (writeB ? 0x001F : 0x0000) | (writeG ? 0x07E0 : 0x0000) | (writeR ? 0xF800 : 0x0000);
727 				unsigned short unmask = ~mask;
728 				*Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
729 				                            (UShort(RoundInt(Float(c.z)) |
730 				                                   (RoundInt(Float(c.y)) << Int(5)) |
731 				                                   (RoundInt(Float(c.x)) << Int(11))) & UShort(mask));
732 			}
733 			break;
734 		case FORMAT_A2B10G10R10:
735 		case FORMAT_A2B10G10R10UI:
736 			if(writeRGBA)
737 			{
738 				*Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)) |
739 				                              (RoundInt(Float(c.y)) << 10) |
740 				                              (RoundInt(Float(c.z)) << 20) |
741 				                              (RoundInt(Float(c.w)) << 30));
742 			}
743 			else
744 			{
745 				unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
746 				                    (writeB ? 0x3FF00000 : 0x0000) |
747 				                    (writeG ? 0x000FFC00 : 0x0000) |
748 				                    (writeR ? 0x000003FF : 0x0000);
749 				unsigned int unmask = ~mask;
750 				*Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
751 				                            (UInt(RoundInt(Float(c.x)) |
752 				                                  (RoundInt(Float(c.y)) << 10) |
753 				                                  (RoundInt(Float(c.z)) << 20) |
754 				                                  (RoundInt(Float(c.w)) << 30)) & UInt(mask));
755 			}
756 			break;
757 		case FORMAT_D16:
758 			*Pointer<UShort>(element) = UShort(RoundInt(Float(c.x)));
759 			break;
760 		case FORMAT_D24S8:
761 		case FORMAT_D24X8:
762 			*Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)) << 8);
763 			break;
764 		case FORMAT_D32:
765 			*Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)));
766 			break;
767 		case FORMAT_D32F_COMPLEMENTARY:
768 		case FORMAT_D32FS8_COMPLEMENTARY:
769 			*Pointer<Float>(element) = 1.0f - c.x;
770 			break;
771 		case FORMAT_D32F:
772 		case FORMAT_D32FS8:
773 		case FORMAT_D32F_LOCKABLE:
774 		case FORMAT_D32FS8_TEXTURE:
775 		case FORMAT_D32F_SHADOW:
776 		case FORMAT_D32FS8_SHADOW:
777 			*Pointer<Float>(element) = c.x;
778 			break;
779 		case FORMAT_S8:
780 			*Pointer<Byte>(element) = Byte(RoundInt(Float(c.x)));
781 			break;
782 		default:
783 			return false;
784 		}
785 		return true;
786 	}
787 
read(Int4 & c,Pointer<Byte> element,const State & state)788 	bool Blitter::read(Int4 &c, Pointer<Byte> element, const State &state)
789 	{
790 		c = Int4(0, 0, 0, 1);
791 
792 		switch(state.sourceFormat)
793 		{
794 		case FORMAT_A8B8G8R8I:
795 			c = Insert(c, Int(*Pointer<SByte>(element + 3)), 3);
796 		case FORMAT_X8B8G8R8I:
797 			c = Insert(c, Int(*Pointer<SByte>(element + 2)), 2);
798 		case FORMAT_G8R8I:
799 			c = Insert(c, Int(*Pointer<SByte>(element + 1)), 1);
800 		case FORMAT_R8I:
801 			c = Insert(c, Int(*Pointer<SByte>(element)), 0);
802 			break;
803 		case FORMAT_A8B8G8R8UI:
804 			c = Insert(c, Int(*Pointer<Byte>(element + 3)), 3);
805 		case FORMAT_X8B8G8R8UI:
806 			c = Insert(c, Int(*Pointer<Byte>(element + 2)), 2);
807 		case FORMAT_G8R8UI:
808 			c = Insert(c, Int(*Pointer<Byte>(element + 1)), 1);
809 		case FORMAT_R8UI:
810 			c = Insert(c, Int(*Pointer<Byte>(element)), 0);
811 			break;
812 		case FORMAT_A16B16G16R16I:
813 			c = Insert(c, Int(*Pointer<Short>(element + 6)), 3);
814 		case FORMAT_X16B16G16R16I:
815 			c = Insert(c, Int(*Pointer<Short>(element + 4)), 2);
816 		case FORMAT_G16R16I:
817 			c = Insert(c, Int(*Pointer<Short>(element + 2)), 1);
818 		case FORMAT_R16I:
819 			c = Insert(c, Int(*Pointer<Short>(element)), 0);
820 			break;
821 		case FORMAT_A16B16G16R16UI:
822 			c = Insert(c, Int(*Pointer<UShort>(element + 6)), 3);
823 		case FORMAT_X16B16G16R16UI:
824 			c = Insert(c, Int(*Pointer<UShort>(element + 4)), 2);
825 		case FORMAT_G16R16UI:
826 			c = Insert(c, Int(*Pointer<UShort>(element + 2)), 1);
827 		case FORMAT_R16UI:
828 			c = Insert(c, Int(*Pointer<UShort>(element)), 0);
829 			break;
830 		case FORMAT_A32B32G32R32I:
831 		case FORMAT_A32B32G32R32UI:
832 			c = *Pointer<Int4>(element);
833 			break;
834 		case FORMAT_X32B32G32R32I:
835 		case FORMAT_X32B32G32R32UI:
836 			c = Insert(c, *Pointer<Int>(element + 8), 2);
837 		case FORMAT_G32R32I:
838 		case FORMAT_G32R32UI:
839 			c = Insert(c, *Pointer<Int>(element + 4), 1);
840 		case FORMAT_R32I:
841 		case FORMAT_R32UI:
842 			c = Insert(c, *Pointer<Int>(element), 0);
843 			break;
844 		default:
845 			return false;
846 		}
847 
848 		return true;
849 	}
850 
write(Int4 & c,Pointer<Byte> element,const State & state)851 	bool Blitter::write(Int4 &c, Pointer<Byte> element, const State &state)
852 	{
853 		bool writeR = state.writeRed;
854 		bool writeG = state.writeGreen;
855 		bool writeB = state.writeBlue;
856 		bool writeA = state.writeAlpha;
857 		bool writeRGBA = writeR && writeG && writeB && writeA;
858 
859 		switch(state.destFormat)
860 		{
861 		case FORMAT_A8B8G8R8I:
862 			if(writeA) { *Pointer<SByte>(element + 3) = SByte(Extract(c, 3)); }
863 		case FORMAT_X8B8G8R8I:
864 			if(writeA && (state.destFormat != FORMAT_A8B8G8R8I))
865 			{
866 				*Pointer<SByte>(element + 3) = SByte(0x7F);
867 			}
868 			if(writeB) { *Pointer<SByte>(element + 2) = SByte(Extract(c, 2)); }
869 		case FORMAT_G8R8I:
870 			if(writeG) { *Pointer<SByte>(element + 1) = SByte(Extract(c, 1)); }
871 		case FORMAT_R8I:
872 			if(writeR) { *Pointer<SByte>(element) = SByte(Extract(c, 0)); }
873 			break;
874 		case FORMAT_A8B8G8R8UI:
875 			if(writeA) { *Pointer<Byte>(element + 3) = Byte(Extract(c, 3)); }
876 		case FORMAT_X8B8G8R8UI:
877 			if(writeA && (state.destFormat != FORMAT_A8B8G8R8UI))
878 			{
879 				*Pointer<Byte>(element + 3) = Byte(0xFF);
880 			}
881 			if(writeB) { *Pointer<Byte>(element + 2) = Byte(Extract(c, 2)); }
882 		case FORMAT_G8R8UI:
883 			if(writeG) { *Pointer<Byte>(element + 1) = Byte(Extract(c, 1)); }
884 		case FORMAT_R8UI:
885 			if(writeR) { *Pointer<Byte>(element) = Byte(Extract(c, 0)); }
886 			break;
887 		case FORMAT_A16B16G16R16I:
888 			if(writeA) { *Pointer<Short>(element + 6) = Short(Extract(c, 3)); }
889 		case FORMAT_X16B16G16R16I:
890 			if(writeA && (state.destFormat != FORMAT_A16B16G16R16I))
891 			{
892 				*Pointer<Short>(element + 6) = Short(0x7FFF);
893 			}
894 			if(writeB) { *Pointer<Short>(element + 4) = Short(Extract(c, 2)); }
895 		case FORMAT_G16R16I:
896 			if(writeG) { *Pointer<Short>(element + 2) = Short(Extract(c, 1)); }
897 		case FORMAT_R16I:
898 			if(writeR) { *Pointer<Short>(element) = Short(Extract(c, 0)); }
899 			break;
900 		case FORMAT_A16B16G16R16UI:
901 			if(writeA) { *Pointer<UShort>(element + 6) = UShort(Extract(c, 3)); }
902 		case FORMAT_X16B16G16R16UI:
903 			if(writeA && (state.destFormat != FORMAT_A16B16G16R16UI))
904 			{
905 				*Pointer<UShort>(element + 6) = UShort(0xFFFF);
906 			}
907 			if(writeB) { *Pointer<UShort>(element + 4) = UShort(Extract(c, 2)); }
908 		case FORMAT_G16R16UI:
909 			if(writeG) { *Pointer<UShort>(element + 2) = UShort(Extract(c, 1)); }
910 		case FORMAT_R16UI:
911 			if(writeR) { *Pointer<UShort>(element) = UShort(Extract(c, 0)); }
912 			break;
913 		case FORMAT_A32B32G32R32I:
914 			if(writeRGBA)
915 			{
916 				*Pointer<Int4>(element) = c;
917 			}
918 			else
919 			{
920 				if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
921 				if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
922 				if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); }
923 				if(writeA) { *Pointer<Int>(element + 12) = Extract(c, 3); }
924 			}
925 			break;
926 		case FORMAT_X32B32G32R32I:
927 			if(writeRGBA)
928 			{
929 				*Pointer<Int4>(element) = c;
930 			}
931 			else
932 			{
933 				if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
934 				if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
935 				if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); }
936 			}
937 			if(writeA) { *Pointer<Int>(element + 12) = Int(0x7FFFFFFF); }
938 			break;
939 		case FORMAT_G32R32I:
940 			if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
941 			if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
942 			break;
943 		case FORMAT_R32I:
944 			if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
945 			break;
946 		case FORMAT_A32B32G32R32UI:
947 			if(writeRGBA)
948 			{
949 				*Pointer<UInt4>(element) = As<UInt4>(c);
950 			}
951 			else
952 			{
953 				if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
954 				if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
955 				if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); }
956 				if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(Extract(c, 3)); }
957 			}
958 			break;
959 		case FORMAT_X32B32G32R32UI:
960 			if(writeRGBA)
961 			{
962 				*Pointer<UInt4>(element) = As<UInt4>(c);
963 			}
964 			else
965 			{
966 				if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
967 				if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
968 				if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); }
969 			}
970 			if(writeA) { *Pointer<UInt>(element + 3) = UInt(0xFFFFFFFF); }
971 			break;
972 		case FORMAT_G32R32UI:
973 			if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
974 			if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
975 			break;
976 		case FORMAT_R32UI:
977 			if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
978 			break;
979 		default:
980 			return false;
981 		}
982 
983 		return true;
984 	}
985 
GetScale(float4 & scale,Format format)986 	bool Blitter::GetScale(float4 &scale, Format format)
987 	{
988 		switch(format)
989 		{
990 		case FORMAT_L8:
991 		case FORMAT_A8:
992 		case FORMAT_A8R8G8B8:
993 		case FORMAT_X8R8G8B8:
994 		case FORMAT_R8:
995 		case FORMAT_G8R8:
996 		case FORMAT_R8G8B8:
997 		case FORMAT_B8G8R8:
998 		case FORMAT_X8B8G8R8:
999 		case FORMAT_A8B8G8R8:
1000 		case FORMAT_SRGB8_X8:
1001 		case FORMAT_SRGB8_A8:
1002 			scale = vector(0xFF, 0xFF, 0xFF, 0xFF);
1003 			break;
1004 		case FORMAT_R8_SNORM:
1005 		case FORMAT_G8R8_SNORM:
1006 		case FORMAT_X8B8G8R8_SNORM:
1007 		case FORMAT_A8B8G8R8_SNORM:
1008 			scale = vector(0x7F, 0x7F, 0x7F, 0x7F);
1009 			break;
1010 		case FORMAT_A16B16G16R16:
1011 			scale = vector(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF);
1012 			break;
1013 		case FORMAT_R8I:
1014 		case FORMAT_R8UI:
1015 		case FORMAT_G8R8I:
1016 		case FORMAT_G8R8UI:
1017 		case FORMAT_X8B8G8R8I:
1018 		case FORMAT_X8B8G8R8UI:
1019 		case FORMAT_A8B8G8R8I:
1020 		case FORMAT_A8B8G8R8UI:
1021 		case FORMAT_R16I:
1022 		case FORMAT_R16UI:
1023 		case FORMAT_G16R16:
1024 		case FORMAT_G16R16I:
1025 		case FORMAT_G16R16UI:
1026 		case FORMAT_X16B16G16R16I:
1027 		case FORMAT_X16B16G16R16UI:
1028 		case FORMAT_A16B16G16R16I:
1029 		case FORMAT_A16B16G16R16UI:
1030 		case FORMAT_R32I:
1031 		case FORMAT_R32UI:
1032 		case FORMAT_G32R32I:
1033 		case FORMAT_G32R32UI:
1034 		case FORMAT_X32B32G32R32I:
1035 		case FORMAT_X32B32G32R32UI:
1036 		case FORMAT_A32B32G32R32I:
1037 		case FORMAT_A32B32G32R32UI:
1038 		case FORMAT_A32B32G32R32F:
1039 		case FORMAT_X32B32G32R32F:
1040 		case FORMAT_X32B32G32R32F_UNSIGNED:
1041 		case FORMAT_B32G32R32F:
1042 		case FORMAT_G32R32F:
1043 		case FORMAT_R32F:
1044 		case FORMAT_A2B10G10R10UI:
1045 			scale = vector(1.0f, 1.0f, 1.0f, 1.0f);
1046 			break;
1047 		case FORMAT_R5G6B5:
1048 			scale = vector(0x1F, 0x3F, 0x1F, 1.0f);
1049 			break;
1050 		case FORMAT_A2B10G10R10:
1051 			scale = vector(0x3FF, 0x3FF, 0x3FF, 0x03);
1052 			break;
1053 		case FORMAT_D16:
1054 			scale = vector(0xFFFF, 0.0f, 0.0f, 0.0f);
1055 			break;
1056 		case FORMAT_D24S8:
1057 		case FORMAT_D24X8:
1058 			scale = vector(0xFFFFFF, 0.0f, 0.0f, 0.0f);
1059 			break;
1060 		case FORMAT_D32:
1061 			scale = vector(static_cast<float>(0xFFFFFFFF), 0.0f, 0.0f, 0.0f);
1062 			break;
1063 		case FORMAT_D32F:
1064 		case FORMAT_D32FS8:
1065 		case FORMAT_D32F_COMPLEMENTARY:
1066 		case FORMAT_D32FS8_COMPLEMENTARY:
1067 		case FORMAT_D32F_LOCKABLE:
1068 		case FORMAT_D32FS8_TEXTURE:
1069 		case FORMAT_D32F_SHADOW:
1070 		case FORMAT_D32FS8_SHADOW:
1071 		case FORMAT_S8:
1072 			scale = vector(1.0f, 1.0f, 1.0f, 1.0f);
1073 			break;
1074 		default:
1075 			return false;
1076 		}
1077 
1078 		return true;
1079 	}
1080 
ApplyScaleAndClamp(Float4 & value,const State & state,bool preScaled)1081 	bool Blitter::ApplyScaleAndClamp(Float4 &value, const State &state, bool preScaled)
1082 	{
1083 		float4 scale, unscale;
1084 		if(state.clearOperation &&
1085 		   Surface::isNonNormalizedInteger(state.sourceFormat) &&
1086 		   !Surface::isNonNormalizedInteger(state.destFormat))
1087 		{
1088 			// If we're clearing a buffer from an int or uint color into a normalized color,
1089 			// then the whole range of the int or uint color must be scaled between 0 and 1.
1090 			switch(state.sourceFormat)
1091 			{
1092 			case FORMAT_A32B32G32R32I:
1093 				unscale = replicate(static_cast<float>(0x7FFFFFFF));
1094 				break;
1095 			case FORMAT_A32B32G32R32UI:
1096 				unscale = replicate(static_cast<float>(0xFFFFFFFF));
1097 				break;
1098 			default:
1099 				return false;
1100 			}
1101 		}
1102 		else if(!GetScale(unscale, state.sourceFormat))
1103 		{
1104 			return false;
1105 		}
1106 
1107 		if(!GetScale(scale, state.destFormat))
1108 		{
1109 			return false;
1110 		}
1111 
1112 		bool srcSRGB = Surface::isSRGBformat(state.sourceFormat);
1113 		bool dstSRGB = Surface::isSRGBformat(state.destFormat);
1114 
1115 		if(state.convertSRGB && ((srcSRGB && !preScaled) || dstSRGB))   // One of the formats is sRGB encoded.
1116 		{
1117 			value *= preScaled ? Float4(1.0f / scale.x, 1.0f / scale.y, 1.0f / scale.z, 1.0f / scale.w) : // Unapply scale
1118 			                     Float4(1.0f / unscale.x, 1.0f / unscale.y, 1.0f / unscale.z, 1.0f / unscale.w); // Apply unscale
1119 			value = (srcSRGB && !preScaled) ? sRGBtoLinear(value) : LinearToSRGB(value);
1120 			value *= Float4(scale.x, scale.y, scale.z, scale.w); // Apply scale
1121 		}
1122 		else if(unscale != scale)
1123 		{
1124 			value *= Float4(scale.x / unscale.x, scale.y / unscale.y, scale.z / unscale.z, scale.w / unscale.w);
1125 		}
1126 
1127 		if(state.destFormat == FORMAT_X32B32G32R32F_UNSIGNED)
1128 		{
1129 			value = Max(value, Float4(0.0f));  // TODO: Only necessary if source is signed.
1130 		}
1131 		else if(Surface::isFloatFormat(state.sourceFormat) && !Surface::isFloatFormat(state.destFormat))
1132 		{
1133 			value = Min(value, Float4(scale.x, scale.y, scale.z, scale.w));
1134 
1135 			value = Max(value, Float4(Surface::isUnsignedComponent(state.destFormat, 0) ? 0.0f : -scale.x,
1136 			                          Surface::isUnsignedComponent(state.destFormat, 1) ? 0.0f : -scale.y,
1137 			                          Surface::isUnsignedComponent(state.destFormat, 2) ? 0.0f : -scale.z,
1138 			                          Surface::isUnsignedComponent(state.destFormat, 3) ? 0.0f : -scale.w));
1139 		}
1140 
1141 		return true;
1142 	}
1143 
ComputeOffset(Int & x,Int & y,Int & pitchB,int bytes,bool quadLayout)1144 	Int Blitter::ComputeOffset(Int &x, Int &y, Int &pitchB, int bytes, bool quadLayout)
1145 	{
1146 		if(!quadLayout)
1147 		{
1148 			return y * pitchB + x * bytes;
1149 		}
1150 		else
1151 		{
1152 			// (x & ~1) * 2 + (x & 1) == (x - (x & 1)) * 2 + (x & 1) == x * 2 - (x & 1) * 2 + (x & 1) == x * 2 - (x & 1)
1153 			return (y & Int(~1)) * pitchB +
1154 			       ((y & Int(1)) * 2 + x * 2 - (x & Int(1))) * bytes;
1155 		}
1156 	}
1157 
LinearToSRGB(Float4 & c)1158 	Float4 Blitter::LinearToSRGB(Float4 &c)
1159 	{
1160 		Float4 lc = Min(c, Float4(0.0031308f)) * Float4(12.92f);
1161 		Float4 ec = Float4(1.055f) * power(c, Float4(1.0f / 2.4f)) - Float4(0.055f);
1162 
1163 		Float4 s = c;
1164 		s.xyz = Max(lc, ec);
1165 
1166 		return s;
1167 	}
1168 
sRGBtoLinear(Float4 & c)1169 	Float4 Blitter::sRGBtoLinear(Float4 &c)
1170 	{
1171 		Float4 lc = c * Float4(1.0f / 12.92f);
1172 		Float4 ec = power((c + Float4(0.055f)) * Float4(1.0f / 1.055f), Float4(2.4f));
1173 
1174 		Int4 linear = CmpLT(c, Float4(0.04045f));
1175 
1176 		Float4 s = c;
1177 		s.xyz = As<Float4>((linear & As<Int4>(lc)) | (~linear & As<Int4>(ec)));   // FIXME: IfThenElse()
1178 
1179 		return s;
1180 	}
1181 
generate(const State & state)1182 	Routine *Blitter::generate(const State &state)
1183 	{
1184 		Function<Void(Pointer<Byte>)> function;
1185 		{
1186 			Pointer<Byte> blit(function.Arg<0>());
1187 
1188 			Pointer<Byte> source = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData,source));
1189 			Pointer<Byte> dest = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData,dest));
1190 			Int sPitchB = *Pointer<Int>(blit + OFFSET(BlitData,sPitchB));
1191 			Int dPitchB = *Pointer<Int>(blit + OFFSET(BlitData,dPitchB));
1192 
1193 			Float x0 = *Pointer<Float>(blit + OFFSET(BlitData,x0));
1194 			Float y0 = *Pointer<Float>(blit + OFFSET(BlitData,y0));
1195 			Float w = *Pointer<Float>(blit + OFFSET(BlitData,w));
1196 			Float h = *Pointer<Float>(blit + OFFSET(BlitData,h));
1197 
1198 			Int x0d = *Pointer<Int>(blit + OFFSET(BlitData,x0d));
1199 			Int x1d = *Pointer<Int>(blit + OFFSET(BlitData,x1d));
1200 			Int y0d = *Pointer<Int>(blit + OFFSET(BlitData,y0d));
1201 			Int y1d = *Pointer<Int>(blit + OFFSET(BlitData,y1d));
1202 
1203 			Int sWidth = *Pointer<Int>(blit + OFFSET(BlitData,sWidth));
1204 			Int sHeight = *Pointer<Int>(blit + OFFSET(BlitData,sHeight));
1205 
1206 			bool intSrc = Surface::isNonNormalizedInteger(state.sourceFormat);
1207 			bool intDst = Surface::isNonNormalizedInteger(state.destFormat);
1208 			bool intBoth = intSrc && intDst;
1209 			bool srcQuadLayout = Surface::hasQuadLayout(state.sourceFormat);
1210 			bool dstQuadLayout = Surface::hasQuadLayout(state.destFormat);
1211 			int srcBytes = Surface::bytes(state.sourceFormat);
1212 			int dstBytes = Surface::bytes(state.destFormat);
1213 
1214 			bool hasConstantColorI = false;
1215 			Int4 constantColorI;
1216 			bool hasConstantColorF = false;
1217 			Float4 constantColorF;
1218 			if(state.clearOperation)
1219 			{
1220 				if(intBoth) // Integer types
1221 				{
1222 					if(!read(constantColorI, source, state))
1223 					{
1224 						return nullptr;
1225 					}
1226 					hasConstantColorI = true;
1227 				}
1228 				else
1229 				{
1230 					if(!read(constantColorF, source, state))
1231 					{
1232 						return nullptr;
1233 					}
1234 					hasConstantColorF = true;
1235 
1236 					if(!ApplyScaleAndClamp(constantColorF, state))
1237 					{
1238 						return nullptr;
1239 					}
1240 				}
1241 			}
1242 
1243 			For(Int j = y0d, j < y1d, j++)
1244 			{
1245 				Float y = state.clearOperation ? RValue<Float>(y0) : y0 + Float(j) * h;
1246 				Pointer<Byte> destLine = dest + (dstQuadLayout ? j & Int(~1) : RValue<Int>(j)) * dPitchB;
1247 
1248 				For(Int i = x0d, i < x1d, i++)
1249 				{
1250 					Float x = state.clearOperation ? RValue<Float>(x0) : x0 + Float(i) * w;
1251 					Pointer<Byte> d = destLine + (dstQuadLayout ? (((j & Int(1)) << 1) + (i * 2) - (i & Int(1))) : RValue<Int>(i)) * dstBytes;
1252 
1253 					if(hasConstantColorI)
1254 					{
1255 						if(!write(constantColorI, d, state))
1256 						{
1257 							return nullptr;
1258 						}
1259 					}
1260 					else if(hasConstantColorF)
1261 					{
1262 						for(int s = 0; s < state.destSamples; s++)
1263 						{
1264 							if(!write(constantColorF, d, state))
1265 							{
1266 								return nullptr;
1267 							}
1268 
1269 							d += *Pointer<Int>(blit + OFFSET(BlitData, dSliceB));
1270 						}
1271 					}
1272 					else if(intBoth) // Integer types do not support filtering
1273 					{
1274 						Int4 color; // When both formats are true integer types, we don't go to float to avoid losing precision
1275 						Int X = Int(x);
1276 						Int Y = Int(y);
1277 
1278 						if(state.clampToEdge)
1279 						{
1280 							X = Clamp(X, 0, sWidth - 1);
1281 							Y = Clamp(Y, 0, sHeight - 1);
1282 						}
1283 
1284 						Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes, srcQuadLayout);
1285 
1286 						if(!read(color, s, state))
1287 						{
1288 							return nullptr;
1289 						}
1290 
1291 						if(!write(color, d, state))
1292 						{
1293 							return nullptr;
1294 						}
1295 					}
1296 					else
1297 					{
1298 						Float4 color;
1299 
1300 						bool preScaled = false;
1301 						if(!state.filter || intSrc)
1302 						{
1303 							Int X = Int(x);
1304 							Int Y = Int(y);
1305 
1306 							if(state.clampToEdge)
1307 							{
1308 								X = Clamp(X, 0, sWidth - 1);
1309 								Y = Clamp(Y, 0, sHeight - 1);
1310 							}
1311 
1312 							Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes, srcQuadLayout);
1313 
1314 							if(!read(color, s, state))
1315 							{
1316 								return nullptr;
1317 							}
1318 						}
1319 						else   // Bilinear filtering
1320 						{
1321 							Float X = x;
1322 							Float Y = y;
1323 
1324 							if(state.clampToEdge)
1325 							{
1326 								X = Min(Max(x, 0.5f), Float(sWidth) - 0.5f);
1327 								Y = Min(Max(y, 0.5f), Float(sHeight) - 0.5f);
1328 							}
1329 
1330 							Float x0 = X - 0.5f;
1331 							Float y0 = Y - 0.5f;
1332 
1333 							Int X0 = Max(Int(x0), 0);
1334 							Int Y0 = Max(Int(y0), 0);
1335 
1336 							Int X1 = X0 + 1;
1337 							Int Y1 = Y0 + 1;
1338 							X1 = IfThenElse(X1 >= sWidth, X0, X1);
1339 							Y1 = IfThenElse(Y1 >= sHeight, Y0, Y1);
1340 
1341 							Pointer<Byte> s00 = source + ComputeOffset(X0, Y0, sPitchB, srcBytes, srcQuadLayout);
1342 							Pointer<Byte> s01 = source + ComputeOffset(X1, Y0, sPitchB, srcBytes, srcQuadLayout);
1343 							Pointer<Byte> s10 = source + ComputeOffset(X0, Y1, sPitchB, srcBytes, srcQuadLayout);
1344 							Pointer<Byte> s11 = source + ComputeOffset(X1, Y1, sPitchB, srcBytes, srcQuadLayout);
1345 
1346 							Float4 c00; if(!read(c00, s00, state)) return nullptr;
1347 							Float4 c01; if(!read(c01, s01, state)) return nullptr;
1348 							Float4 c10; if(!read(c10, s10, state)) return nullptr;
1349 							Float4 c11; if(!read(c11, s11, state)) return nullptr;
1350 
1351 							if(state.convertSRGB && Surface::isSRGBformat(state.sourceFormat)) // sRGB -> RGB
1352 							{
1353 								if(!ApplyScaleAndClamp(c00, state)) return nullptr;
1354 								if(!ApplyScaleAndClamp(c01, state)) return nullptr;
1355 								if(!ApplyScaleAndClamp(c10, state)) return nullptr;
1356 								if(!ApplyScaleAndClamp(c11, state)) return nullptr;
1357 								preScaled = true;
1358 							}
1359 
1360 							Float4 fx = Float4(x0 - Float(X0));
1361 							Float4 fy = Float4(y0 - Float(Y0));
1362 							Float4 ix = Float4(1.0f) - fx;
1363 							Float4 iy = Float4(1.0f) - fy;
1364 
1365 							color = (c00 * ix + c01 * fx) * iy +
1366 							        (c10 * ix + c11 * fx) * fy;
1367 						}
1368 
1369 						if(!ApplyScaleAndClamp(color, state, preScaled))
1370 						{
1371 							return nullptr;
1372 						}
1373 
1374 						for(int s = 0; s < state.destSamples; s++)
1375 						{
1376 							if(!write(color, d, state))
1377 							{
1378 								return nullptr;
1379 							}
1380 
1381 							d += *Pointer<Int>(blit + OFFSET(BlitData,dSliceB));
1382 						}
1383 					}
1384 				}
1385 			}
1386 		}
1387 
1388 		return function("BlitRoutine");
1389 	}
1390 
blitReactor(Surface * source,const SliceRectF & sourceRect,Surface * dest,const SliceRect & destRect,const Blitter::Options & options)1391 	bool Blitter::blitReactor(Surface *source, const SliceRectF &sourceRect, Surface *dest, const SliceRect &destRect, const Blitter::Options &options)
1392 	{
1393 		ASSERT(!options.clearOperation || ((source->getWidth() == 1) && (source->getHeight() == 1) && (source->getDepth() == 1)));
1394 
1395 		Rect dRect = destRect;
1396 		RectF sRect = sourceRect;
1397 		if(destRect.x0 > destRect.x1)
1398 		{
1399 			swap(dRect.x0, dRect.x1);
1400 			swap(sRect.x0, sRect.x1);
1401 		}
1402 		if(destRect.y0 > destRect.y1)
1403 		{
1404 			swap(dRect.y0, dRect.y1);
1405 			swap(sRect.y0, sRect.y1);
1406 		}
1407 
1408 		State state(options);
1409 		state.clampToEdge = (sourceRect.x0 < 0.0f) ||
1410 		                    (sourceRect.y0 < 0.0f) ||
1411 		                    (sourceRect.x1 > (float)source->getWidth()) ||
1412 		                    (sourceRect.y1 > (float)source->getHeight());
1413 
1414 		bool useSourceInternal = !source->isExternalDirty();
1415 		bool useDestInternal = !dest->isExternalDirty();
1416 		bool isStencil = options.useStencil;
1417 
1418 		state.sourceFormat = isStencil ? source->getStencilFormat() : source->getFormat(useSourceInternal);
1419 		state.destFormat = isStencil ? dest->getStencilFormat() : dest->getFormat(useDestInternal);
1420 		state.destSamples = dest->getSamples();
1421 
1422 		criticalSection.lock();
1423 		Routine *blitRoutine = blitCache->query(state);
1424 
1425 		if(!blitRoutine)
1426 		{
1427 			blitRoutine = generate(state);
1428 
1429 			if(!blitRoutine)
1430 			{
1431 				criticalSection.unlock();
1432 				return false;
1433 			}
1434 
1435 			blitCache->add(state, blitRoutine);
1436 		}
1437 
1438 		criticalSection.unlock();
1439 
1440 		void (*blitFunction)(const BlitData *data) = (void(*)(const BlitData*))blitRoutine->getEntry();
1441 
1442 		BlitData data;
1443 
1444 		bool isRGBA = options.writeMask == 0xF;
1445 		bool isEntireDest = dest->isEntire(destRect);
1446 
1447 		data.source = isStencil ? source->lockStencil(0, 0, 0, sw::PUBLIC) :
1448 		                          source->lock(0, 0, sourceRect.slice, sw::LOCK_READONLY, sw::PUBLIC, useSourceInternal);
1449 		data.dest = isStencil ? dest->lockStencil(0, 0, 0, sw::PUBLIC) :
1450 		                        dest->lock(0, 0, destRect.slice, isRGBA ? (isEntireDest ? sw::LOCK_DISCARD : sw::LOCK_WRITEONLY) : sw::LOCK_READWRITE, sw::PUBLIC, useDestInternal);
1451 		data.sPitchB = isStencil ? source->getStencilPitchB() : source->getPitchB(useSourceInternal);
1452 		data.dPitchB = isStencil ? dest->getStencilPitchB() : dest->getPitchB(useDestInternal);
1453 		data.dSliceB = isStencil ? dest->getStencilSliceB() : dest->getSliceB(useDestInternal);
1454 
1455 		data.w = sRect.width() / dRect.width();
1456 		data.h = sRect.height() / dRect.height();
1457 		data.x0 = sRect.x0 + (0.5f - dRect.x0) * data.w;
1458 		data.y0 = sRect.y0 + (0.5f - dRect.y0) * data.h;
1459 
1460 		data.x0d = dRect.x0;
1461 		data.x1d = dRect.x1;
1462 		data.y0d = dRect.y0;
1463 		data.y1d = dRect.y1;
1464 
1465 		data.sWidth = source->getWidth();
1466 		data.sHeight = source->getHeight();
1467 
1468 		blitFunction(&data);
1469 
1470 		if(isStencil)
1471 		{
1472 			source->unlockStencil();
1473 			dest->unlockStencil();
1474 		}
1475 		else
1476 		{
1477 			source->unlock(useSourceInternal);
1478 			dest->unlock(useDestInternal);
1479 		}
1480 
1481 		return true;
1482 	}
1483 }
1484