1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "Blitter.hpp"
16 
17 #include "Common/Debug.hpp"
18 #include "Reactor/Reactor.hpp"
19 
20 namespace sw
21 {
22 	Blitter blitter;
23 
Blitter()24 	Blitter::Blitter()
25 	{
26 		blitCache = new RoutineCache<BlitState>(1024);
27 	}
28 
~Blitter()29 	Blitter::~Blitter()
30 	{
31 		delete blitCache;
32 	}
33 
clear(void * pixel,sw::Format format,Surface * dest,const SliceRect & dRect,unsigned int rgbaMask)34 	void Blitter::clear(void* pixel, sw::Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask)
35 	{
36 		sw::Surface color(1, 1, 1, format, pixel, sw::Surface::bytes(format), sw::Surface::bytes(format));
37 		Blitter::Options clearOptions = static_cast<sw::Blitter::Options>((rgbaMask & 0xF) | CLEAR_OPERATION);
38 		SliceRect sRect(dRect);
39 		sRect.slice = 0;
40 		blit(&color, sRect, dest, dRect, clearOptions);
41 	}
42 
blit(Surface * source,const SliceRect & sRect,Surface * dest,const SliceRect & dRect,bool filter)43 	void Blitter::blit(Surface *source, const SliceRect &sRect, Surface *dest, const SliceRect &dRect, bool filter)
44 	{
45 		Blitter::Options options = filter ? static_cast<Blitter::Options>(WRITE_RGBA | FILTER_LINEAR) : WRITE_RGBA;
46 		blit(source, sRect, dest, dRect, options);
47 	}
48 
blit(Surface * source,const SliceRect & sourceRect,Surface * dest,const SliceRect & destRect,const Blitter::Options & options)49 	void Blitter::blit(Surface *source, const SliceRect &sourceRect, Surface *dest, const SliceRect &destRect, const Blitter::Options& options)
50 	{
51 		if(dest->getInternalFormat() == FORMAT_NULL)
52 		{
53 			return;
54 		}
55 
56 		if(blitReactor(source, sourceRect, dest, destRect, options))
57 		{
58 			return;
59 		}
60 
61 		SliceRect sRect = sourceRect;
62 		SliceRect dRect = destRect;
63 
64 		bool flipX = destRect.x0 > destRect.x1;
65 		bool flipY = destRect.y0 > destRect.y1;
66 
67 		if(flipX)
68 		{
69 			swap(dRect.x0, dRect.x1);
70 			swap(sRect.x0, sRect.x1);
71 		}
72 		if(flipY)
73 		{
74 			swap(dRect.y0, dRect.y1);
75 			swap(sRect.y0, sRect.y1);
76 		}
77 
78 		source->lockInternal(sRect.x0, sRect.y0, sRect.slice, sw::LOCK_READONLY, sw::PUBLIC);
79 		dest->lockInternal(dRect.x0, dRect.y0, dRect.slice, sw::LOCK_WRITEONLY, sw::PUBLIC);
80 
81 		float w = static_cast<float>(sRect.x1 - sRect.x0) / static_cast<float>(dRect.x1 - dRect.x0);
82 		float h = static_cast<float>(sRect.y1 - sRect.y0) / static_cast<float>(dRect.y1 - dRect.y0);
83 
84 		const float xStart = (float)sRect.x0 + 0.5f * w;
85 		float y = (float)sRect.y0 + 0.5f * h;
86 
87 		for(int j = dRect.y0; j < dRect.y1; j++)
88 		{
89 			float x = xStart;
90 
91 			for(int i = dRect.x0; i < dRect.x1; i++)
92 			{
93 				// FIXME: Support RGBA mask
94 				dest->copyInternal(source, i, j, x, y, (options & FILTER_LINEAR) == FILTER_LINEAR);
95 
96 				x += w;
97 			}
98 
99 			y += h;
100 		}
101 
102 		source->unlockInternal();
103 		dest->unlockInternal();
104 	}
105 
blit3D(Surface * source,Surface * dest)106 	void Blitter::blit3D(Surface *source, Surface *dest)
107 	{
108 		source->lockInternal(0, 0, 0, sw::LOCK_READONLY, sw::PUBLIC);
109 		dest->lockInternal(0, 0, 0, sw::LOCK_WRITEONLY, sw::PUBLIC);
110 
111 		float w = static_cast<float>(source->getWidth())  / static_cast<float>(dest->getWidth());
112 		float h = static_cast<float>(source->getHeight()) / static_cast<float>(dest->getHeight());
113 		float d = static_cast<float>(source->getDepth())  / static_cast<float>(dest->getDepth());
114 
115 		float z = 0.5f * d;
116 		for(int k = 0; k < dest->getDepth(); ++k)
117 		{
118 			float y = 0.5f * h;
119 			for(int j = 0; j < dest->getHeight(); ++j)
120 			{
121 				float x = 0.5f * w;
122 				for(int i = 0; i < dest->getWidth(); ++i)
123 				{
124 					dest->copyInternal(source, i, j, k, x, y, z, true);
125 					x += w;
126 				}
127 				y += h;
128 			}
129 			z += d;
130 		}
131 
132 		source->unlockInternal();
133 		dest->unlockInternal();
134 	}
135 
read(Float4 & c,Pointer<Byte> element,Format format)136 	bool Blitter::read(Float4 &c, Pointer<Byte> element, Format format)
137 	{
138 		c = Float4(0.0f, 0.0f, 0.0f, 1.0f);
139 
140 		switch(format)
141 		{
142 		case FORMAT_L8:
143 			c.xyz = Float(Int(*Pointer<Byte>(element)));
144 			c.w = float(0xFF);
145 			break;
146 		case FORMAT_A8:
147 			c.w = Float(Int(*Pointer<Byte>(element)));
148 			break;
149 		case FORMAT_R8I:
150 		case FORMAT_R8I_SNORM:
151 			c.x = Float(Int(*Pointer<SByte>(element)));
152 			c.w = float(0x7F);
153 			break;
154 		case FORMAT_R8:
155 		case FORMAT_R8UI:
156 			c.x = Float(Int(*Pointer<Byte>(element)));
157 			c.w = float(0xFF);
158 			break;
159 		case FORMAT_R16I:
160 			c.x = Float(Int(*Pointer<Short>(element)));
161 			c.w = float(0x7FFF);
162 			break;
163 		case FORMAT_R16UI:
164 			c.x = Float(Int(*Pointer<UShort>(element)));
165 			c.w = float(0xFFFF);
166 			break;
167 		case FORMAT_R32I:
168 			c.x = Float(Int(*Pointer<Int>(element)));
169 			c.w = float(0x7FFFFFFF);
170 			break;
171 		case FORMAT_R32UI:
172 			c.x = Float(Int(*Pointer<UInt>(element)));
173 			c.w = float(0xFFFFFFFF);
174 			break;
175 		case FORMAT_A8R8G8B8:
176 			c = Float4(*Pointer<Byte4>(element)).zyxw;
177 			break;
178 		case FORMAT_A8B8G8R8I:
179 		case FORMAT_A8B8G8R8I_SNORM:
180 			c = Float4(*Pointer<SByte4>(element));
181 			break;
182 		case FORMAT_A8B8G8R8:
183 		case FORMAT_A8B8G8R8UI:
184 		case FORMAT_SRGB8_A8:
185 			c = Float4(*Pointer<Byte4>(element));
186 			break;
187 		case FORMAT_X8R8G8B8:
188 			c = Float4(*Pointer<Byte4>(element)).zyxw;
189 			c.w = float(0xFF);
190 			break;
191 		case FORMAT_R8G8B8:
192 			c.z = Float(Int(*Pointer<Byte>(element + 0)));
193 			c.y = Float(Int(*Pointer<Byte>(element + 1)));
194 			c.x = Float(Int(*Pointer<Byte>(element + 2)));
195 			c.w = float(0xFF);
196 			break;
197 		case FORMAT_B8G8R8:
198 			c.x = Float(Int(*Pointer<Byte>(element + 0)));
199 			c.y = Float(Int(*Pointer<Byte>(element + 1)));
200 			c.z = Float(Int(*Pointer<Byte>(element + 2)));
201 			c.w = float(0xFF);
202 			break;
203 		case FORMAT_X8B8G8R8I:
204 		case FORMAT_X8B8G8R8I_SNORM:
205 			c = Float4(*Pointer<SByte4>(element));
206 			c.w = float(0x7F);
207 			break;
208 		case FORMAT_X8B8G8R8:
209 		case FORMAT_X8B8G8R8UI:
210 		case FORMAT_SRGB8_X8:
211 			c = Float4(*Pointer<Byte4>(element));
212 			c.w = float(0xFF);
213 			break;
214 		case FORMAT_A16B16G16R16I:
215 			c = Float4(*Pointer<Short4>(element));
216 			break;
217 		case FORMAT_A16B16G16R16:
218 		case FORMAT_A16B16G16R16UI:
219 			c = Float4(*Pointer<UShort4>(element));
220 			break;
221 		case FORMAT_X16B16G16R16I:
222 			c = Float4(*Pointer<Short4>(element));
223 			c.w = float(0x7FFF);
224 			break;
225 		case FORMAT_X16B16G16R16UI:
226 			c = Float4(*Pointer<UShort4>(element));
227 			c.w = float(0xFFFF);
228 			break;
229 		case FORMAT_A32B32G32R32I:
230 			c = Float4(*Pointer<Int4>(element));
231 			break;
232 		case FORMAT_A32B32G32R32UI:
233 			c = Float4(*Pointer<UInt4>(element));
234 			break;
235 		case FORMAT_X32B32G32R32I:
236 			c = Float4(*Pointer<Int4>(element));
237 			c.w = float(0x7FFFFFFF);
238 			break;
239 		case FORMAT_X32B32G32R32UI:
240 			c = Float4(*Pointer<UInt4>(element));
241 			c.w = float(0xFFFFFFFF);
242 			break;
243 		case FORMAT_G8R8I:
244 		case FORMAT_G8R8I_SNORM:
245 			c.x = Float(Int(*Pointer<SByte>(element + 0)));
246 			c.y = Float(Int(*Pointer<SByte>(element + 1)));
247 			c.w = float(0x7F);
248 			break;
249 		case FORMAT_G8R8:
250 		case FORMAT_G8R8UI:
251 			c.x = Float(Int(*Pointer<Byte>(element + 0)));
252 			c.y = Float(Int(*Pointer<Byte>(element + 1)));
253 			c.w = float(0xFF);
254 			break;
255 		case FORMAT_G16R16I:
256 			c.x = Float(Int(*Pointer<Short>(element + 0)));
257 			c.y = Float(Int(*Pointer<Short>(element + 2)));
258 			c.w = float(0x7FFF);
259 			break;
260 		case FORMAT_G16R16:
261 		case FORMAT_G16R16UI:
262 			c.x = Float(Int(*Pointer<UShort>(element + 0)));
263 			c.y = Float(Int(*Pointer<UShort>(element + 2)));
264 			c.w = float(0xFFFF);
265 			break;
266 		case FORMAT_G32R32I:
267 			c.x = Float(Int(*Pointer<Int>(element + 0)));
268 			c.y = Float(Int(*Pointer<Int>(element + 4)));
269 			c.w = float(0x7FFFFFFF);
270 			break;
271 		case FORMAT_G32R32UI:
272 			c.x = Float(Int(*Pointer<UInt>(element + 0)));
273 			c.y = Float(Int(*Pointer<UInt>(element + 4)));
274 			c.w = float(0xFFFFFFFF);
275 			break;
276 		case FORMAT_A32B32G32R32F:
277 			c = *Pointer<Float4>(element);
278 			break;
279 		case FORMAT_X32B32G32R32F:
280 		case FORMAT_B32G32R32F:
281 			c.z = *Pointer<Float>(element + 8);
282 		case FORMAT_G32R32F:
283 			c.x = *Pointer<Float>(element + 0);
284 			c.y = *Pointer<Float>(element + 4);
285 			break;
286 		case FORMAT_R32F:
287 			c.x = *Pointer<Float>(element);
288 			break;
289 		case FORMAT_R5G6B5:
290 			c.x = Float(Int((*Pointer<UShort>(element) & UShort(0xF800)) >> UShort(11)));
291 			c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x07E0)) >> UShort(5)));
292 			c.z = Float(Int(*Pointer<UShort>(element) & UShort(0x001F)));
293 			break;
294 		case FORMAT_A2B10G10R10:
295 			c.x = Float(Int((*Pointer<UInt>(element) & UInt(0x000003FF))));
296 			c.y = Float(Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10));
297 			c.z = Float(Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20));
298 			c.w = Float(Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30));
299 			break;
300 		case FORMAT_D16:
301 			c.x = Float(Int((*Pointer<UShort>(element))));
302 			break;
303 		case FORMAT_D24S8:
304 			c.x = Float(Int((*Pointer<UInt>(element))));
305 			break;
306 		case FORMAT_D32:
307 			c.x = Float(Int((*Pointer<UInt>(element))));
308 			break;
309 		case FORMAT_D32F:
310 			c.x = *Pointer<Float>(element);
311 			break;
312 		case FORMAT_D32F_COMPLEMENTARY:
313 			c.x = 1.0f - *Pointer<Float>(element);
314 			break;
315 		case FORMAT_D32F_LOCKABLE:
316 			c.x = *Pointer<Float>(element);
317 			break;
318 		case FORMAT_D32FS8_TEXTURE:
319 			c.x = *Pointer<Float>(element);
320 			break;
321 		case FORMAT_D32FS8_SHADOW:
322 			c.x = *Pointer<Float>(element);
323 			break;
324 		default:
325 			return false;
326 		}
327 
328 		return true;
329 	}
330 
write(Float4 & c,Pointer<Byte> element,Format format,const Blitter::Options & options)331 	bool Blitter::write(Float4 &c, Pointer<Byte> element, Format format, const Blitter::Options& options)
332 	{
333 		bool writeR = (options & WRITE_RED) == WRITE_RED;
334 		bool writeG = (options & WRITE_GREEN) == WRITE_GREEN;
335 		bool writeB = (options & WRITE_BLUE) == WRITE_BLUE;
336 		bool writeA = (options & WRITE_ALPHA) == WRITE_ALPHA;
337 		bool writeRGBA = writeR && writeG && writeB && writeA;
338 
339 		switch(format)
340 		{
341 		case FORMAT_L8:
342 			*Pointer<Byte>(element) = Byte(RoundInt(Float(c.x)));
343 			break;
344 		case FORMAT_A8:
345 			if(writeA) { *Pointer<Byte>(element) = Byte(RoundInt(Float(c.w))); }
346 			break;
347 		case FORMAT_A8R8G8B8:
348 			if(writeRGBA)
349 			{
350 				UShort4 c0 = As<UShort4>(RoundShort4(c.zyxw));
351 				Byte8 c1 = Pack(c0, c0);
352 				*Pointer<UInt>(element) = UInt(As<Long>(c1));
353 			}
354 			else
355 			{
356 				if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
357 				if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
358 				if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
359 				if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
360 			}
361 			break;
362 		case FORMAT_A8B8G8R8:
363 		case FORMAT_SRGB8_A8:
364 			if(writeRGBA)
365 			{
366 				UShort4 c0 = As<UShort4>(RoundShort4(c));
367 				Byte8 c1 = Pack(c0, c0);
368 				*Pointer<UInt>(element) = UInt(As<Long>(c1));
369 			}
370 			else
371 			{
372 				if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); }
373 				if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
374 				if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
375 				if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
376 			}
377 			break;
378 		case FORMAT_X8R8G8B8:
379 			if(writeRGBA)
380 			{
381 				UShort4 c0 = As<UShort4>(RoundShort4(c.zyxw));
382 				Byte8 c1 = Pack(c0, c0);
383 				*Pointer<UInt>(element) = UInt(As<Long>(c1)) | 0xFF000000;
384 			}
385 			else
386 			{
387 				if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
388 				if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
389 				if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
390 				if(writeA) { *Pointer<Byte>(element + 3) = Byte(0xFF); }
391 			}
392 			break;
393 		case FORMAT_X8B8G8R8:
394 		case FORMAT_SRGB8_X8:
395 			if(writeRGBA)
396 			{
397 				UShort4 c0 = As<UShort4>(RoundShort4(c));
398 				Byte8 c1 = Pack(c0, c0);
399 				*Pointer<UInt>(element) = UInt(As<Long>(c1)) | 0xFF000000;
400 			}
401 			else
402 			{
403 				if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); }
404 				if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
405 				if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
406 				if(writeA) { *Pointer<Byte>(element + 3) = Byte(0xFF); }
407 			}
408 			break;
409 		case FORMAT_R8G8B8:
410 			if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
411 			if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
412 			if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
413 			break;
414 		case FORMAT_B8G8R8:
415 			if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); }
416 			if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
417 			if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
418 			break;
419 		case FORMAT_A32B32G32R32F:
420 			if(writeRGBA)
421 			{
422 				*Pointer<Float4>(element) = c;
423 			}
424 			else
425 			{
426 				if(writeR) { *Pointer<Float>(element) = c.x; }
427 				if(writeG) { *Pointer<Float>(element + 4) = c.y; }
428 				if(writeB) { *Pointer<Float>(element + 8) = c.z; }
429 				if(writeA) { *Pointer<Float>(element + 12) = c.w; }
430 			}
431 			break;
432 		case FORMAT_X32B32G32R32F:
433 			if(writeA) { *Pointer<Float>(element + 12) = 1.0f; }
434 		case FORMAT_B32G32R32F:
435 			if(writeR) { *Pointer<Float>(element) = c.x; }
436 			if(writeG) { *Pointer<Float>(element + 4) = c.y; }
437 			if(writeB) { *Pointer<Float>(element + 8) = c.z; }
438 			break;
439 		case FORMAT_G32R32F:
440 			if(writeR && writeG)
441 			{
442 				*Pointer<Float2>(element) = Float2(c);
443 			}
444 			else
445 			{
446 				if(writeR) { *Pointer<Float>(element) = c.x; }
447 				if(writeG) { *Pointer<Float>(element + 4) = c.y; }
448 			}
449 			break;
450 		case FORMAT_R32F:
451 			if(writeR) { *Pointer<Float>(element) = c.x; }
452 			break;
453 		case FORMAT_A8B8G8R8I:
454 		case FORMAT_A8B8G8R8I_SNORM:
455 			if(writeA) { *Pointer<SByte>(element + 3) = SByte(RoundInt(Float(c.w))); }
456 		case FORMAT_X8B8G8R8I:
457 		case FORMAT_X8B8G8R8I_SNORM:
458 			if(writeA && (format == FORMAT_X8B8G8R8I || format == FORMAT_X8B8G8R8I_SNORM))
459 			{
460 				*Pointer<SByte>(element + 3) = SByte(0x7F);
461 			}
462 			if(writeB) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.z))); }
463 		case FORMAT_G8R8I:
464 		case FORMAT_G8R8I_SNORM:
465 			if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); }
466 		case FORMAT_R8I:
467 		case FORMAT_R8I_SNORM:
468 			if(writeR) { *Pointer<SByte>(element) = SByte(RoundInt(Float(c.x))); }
469 			break;
470 		case FORMAT_A8B8G8R8UI:
471 			if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
472 		case FORMAT_X8B8G8R8UI:
473 			if(writeA && (format == FORMAT_X8B8G8R8UI))
474 			{
475 				*Pointer<Byte>(element + 3) = Byte(0xFF);
476 			}
477 			if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
478 		case FORMAT_G8R8UI:
479 		case FORMAT_G8R8:
480 			if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
481 		case FORMAT_R8UI:
482 		case FORMAT_R8:
483 			if(writeR) { *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x))); }
484 			break;
485 		case FORMAT_A16B16G16R16I:
486 			if(writeRGBA)
487 			{
488 				*Pointer<Short4>(element) = Short4(RoundInt(c));
489 			}
490 			else
491 			{
492 				if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
493 				if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
494 				if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); }
495 				if(writeA) { *Pointer<Short>(element + 6) = Short(RoundInt(Float(c.w))); }
496 			}
497 			break;
498 		case FORMAT_X16B16G16R16I:
499 			if(writeRGBA)
500 			{
501 				*Pointer<Short4>(element) = Short4(RoundInt(c));
502 			}
503 			else
504 			{
505 				if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
506 				if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
507 				if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); }
508 			}
509 			if(writeA) { *Pointer<Short>(element + 6) = Short(0x7F); }
510 			break;
511 		case FORMAT_G16R16I:
512 			if(writeR && writeG)
513 			{
514 				*Pointer<UInt>(element) = UInt(As<Long>(Short4(RoundInt(c))));
515 			}
516 			else
517 			{
518 				if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
519 				if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
520 			}
521 			break;
522 		case FORMAT_R16I:
523 			if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
524 			break;
525 		case FORMAT_A16B16G16R16UI:
526 		case FORMAT_A16B16G16R16:
527 			if(writeRGBA)
528 			{
529 				*Pointer<UShort4>(element) = UShort4(RoundInt(c));
530 			}
531 			else
532 			{
533 				if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
534 				if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
535 				if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); }
536 				if(writeA) { *Pointer<UShort>(element + 6) = UShort(RoundInt(Float(c.w))); }
537 			}
538 			break;
539 		case FORMAT_X16B16G16R16UI:
540 			if(writeRGBA)
541 			{
542 				*Pointer<UShort4>(element) = UShort4(RoundInt(c));
543 			}
544 			else
545 			{
546 				if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
547 				if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
548 				if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); }
549 			}
550 			if(writeA) { *Pointer<UShort>(element + 6) = UShort(0xFF); }
551 			break;
552 		case FORMAT_G16R16UI:
553 		case FORMAT_G16R16:
554 			if(writeR && writeG)
555 			{
556 				*Pointer<UInt>(element) = UInt(As<Long>(UShort4(RoundInt(c))));
557 			}
558 			else
559 			{
560 				if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
561 				if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
562 			}
563 			break;
564 		case FORMAT_R16UI:
565 			if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
566 			break;
567 		case FORMAT_A32B32G32R32I:
568 			if(writeRGBA)
569 			{
570 				*Pointer<Int4>(element) = RoundInt(c);
571 			}
572 			else
573 			{
574 				if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
575 				if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
576 				if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); }
577 				if(writeA) { *Pointer<Int>(element + 12) = RoundInt(Float(c.w)); }
578 			}
579 			break;
580 		case FORMAT_X32B32G32R32I:
581 			if(writeRGBA)
582 			{
583 				*Pointer<Int4>(element) = RoundInt(c);
584 			}
585 			else
586 			{
587 				if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
588 				if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
589 				if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); }
590 			}
591 			if(writeA) { *Pointer<Int>(element + 12) = Int(0x7FFFFFFF); }
592 			break;
593 		case FORMAT_G32R32I:
594 			if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
595 		case FORMAT_R32I:
596 			if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
597 			break;
598 		case FORMAT_A32B32G32R32UI:
599 			if(writeRGBA)
600 			{
601 				*Pointer<UInt4>(element) = UInt4(RoundInt(c));
602 			}
603 			else
604 			{
605 				if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
606 				if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
607 				if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); }
608 				if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(RoundInt(Float(c.w))); }
609 			}
610 			break;
611 		case FORMAT_X32B32G32R32UI:
612 			if(writeRGBA)
613 			{
614 				*Pointer<UInt4>(element) = UInt4(RoundInt(c));
615 			}
616 			else
617 			{
618 				if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
619 				if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
620 				if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); }
621 			}
622 			if(writeA) { *Pointer<UInt4>(element + 12) = UInt4(0xFFFFFFFF); }
623 			break;
624 		case FORMAT_G32R32UI:
625 			if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
626 		case FORMAT_R32UI:
627 			if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
628 			break;
629 		case FORMAT_R5G6B5:
630 			if(writeR && writeG && writeB)
631 			{
632 				*Pointer<UShort>(element) = UShort(RoundInt(Float(c.z)) |
633 				                                  (RoundInt(Float(c.y)) << Int(5)) |
634 				                                  (RoundInt(Float(c.x)) << Int(11)));
635 			}
636 			else
637 			{
638 				unsigned short mask = (writeB ? 0x001F : 0x0000) | (writeG ? 0x07E0 : 0x0000) | (writeR ? 0xF800 : 0x0000);
639 				unsigned short unmask = ~mask;
640 				*Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
641 				                            (UShort(RoundInt(Float(c.z)) |
642 				                                   (RoundInt(Float(c.y)) << Int(5)) |
643 				                                   (RoundInt(Float(c.x)) << Int(11))) & UShort(mask));
644 			}
645 			break;
646 		case FORMAT_A2B10G10R10:
647 			if(writeRGBA)
648 			{
649 				*Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)) |
650 				                              (RoundInt(Float(c.y)) << 10) |
651 				                              (RoundInt(Float(c.z)) << 20) |
652 				                              (RoundInt(Float(c.w)) << 30));
653 			}
654 			else
655 			{
656 				unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
657 				                    (writeB ? 0x3FF00000 : 0x0000) |
658 				                    (writeG ? 0x000FFC00 : 0x0000) |
659 				                    (writeR ? 0x000003FF : 0x0000);
660 				unsigned int unmask = ~mask;
661 				*Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
662 				                            (UInt(RoundInt(Float(c.x)) |
663 				                                  (RoundInt(Float(c.y)) << 10) |
664 				                                  (RoundInt(Float(c.z)) << 20) |
665 				                                  (RoundInt(Float(c.w)) << 30)) & UInt(mask));
666 			}
667 			break;
668 		case FORMAT_D16:
669 			*Pointer<UShort>(element) = UShort(RoundInt(Float(c.x)));
670 			break;
671 		case FORMAT_D24S8:
672 			*Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)));
673 			break;
674 		case FORMAT_D32:
675 			*Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)));
676 			break;
677 		case FORMAT_D32F:
678 			*Pointer<Float>(element) = c.x;
679 			break;
680 		case FORMAT_D32F_COMPLEMENTARY:
681 			*Pointer<Float>(element) = 1.0f - c.x;
682 			break;
683 		case FORMAT_D32F_LOCKABLE:
684 			*Pointer<Float>(element) = c.x;
685 			break;
686 		case FORMAT_D32FS8_TEXTURE:
687 			*Pointer<Float>(element) = c.x;
688 			break;
689 		case FORMAT_D32FS8_SHADOW:
690 			*Pointer<Float>(element) = c.x;
691 			break;
692 		default:
693 			return false;
694 		}
695 		return true;
696 	}
697 
read(Int4 & c,Pointer<Byte> element,Format format)698 	bool Blitter::read(Int4 &c, Pointer<Byte> element, Format format)
699 	{
700 		c = Int4(0, 0, 0, 0xFFFFFFFF);
701 
702 		switch(format)
703 		{
704 		case FORMAT_A8B8G8R8I:
705 			c = Insert(c, Int(*Pointer<SByte>(element + 3)), 3);
706 		case FORMAT_X8B8G8R8I:
707 			c = Insert(c, Int(*Pointer<SByte>(element + 2)), 2);
708 		case FORMAT_G8R8I:
709 			c = Insert(c, Int(*Pointer<SByte>(element + 1)), 1);
710 		case FORMAT_R8I:
711 			c = Insert(c, Int(*Pointer<SByte>(element)), 0);
712 			if(format != FORMAT_A8B8G8R8I)
713 			{
714 				c = Insert(c, Int(0x7F), 3); // Set alpha
715 			}
716 			break;
717 		case FORMAT_A8B8G8R8UI:
718 			c = Insert(c, Int(*Pointer<Byte>(element + 3)), 3);
719 		case FORMAT_X8B8G8R8UI:
720 			c = Insert(c, Int(*Pointer<Byte>(element + 2)), 2);
721 		case FORMAT_G8R8UI:
722 			c = Insert(c, Int(*Pointer<Byte>(element + 1)), 1);
723 		case FORMAT_R8UI:
724 			c = Insert(c, Int(*Pointer<Byte>(element)), 0);
725 			if(format != FORMAT_A8B8G8R8UI)
726 			{
727 				c = Insert(c, Int(0xFF), 3); // Set alpha
728 			}
729 			break;
730 		case FORMAT_A16B16G16R16I:
731 			c = Insert(c, Int(*Pointer<Short>(element + 6)), 3);
732 		case FORMAT_X16B16G16R16I:
733 			c = Insert(c, Int(*Pointer<Short>(element + 4)), 2);
734 		case FORMAT_G16R16I:
735 			c = Insert(c, Int(*Pointer<Short>(element + 2)), 1);
736 		case FORMAT_R16I:
737 			c = Insert(c, Int(*Pointer<Short>(element)), 0);
738 			if(format != FORMAT_A16B16G16R16I)
739 			{
740 				c = Insert(c, Int(0x7FFF), 3); // Set alpha
741 			}
742 			break;
743 		case FORMAT_A16B16G16R16UI:
744 			c = Insert(c, Int(*Pointer<UShort>(element + 6)), 3);
745 		case FORMAT_X16B16G16R16UI:
746 			c = Insert(c, Int(*Pointer<UShort>(element + 4)), 2);
747 		case FORMAT_G16R16UI:
748 			c = Insert(c, Int(*Pointer<UShort>(element + 2)), 1);
749 		case FORMAT_R16UI:
750 			c = Insert(c, Int(*Pointer<UShort>(element)), 0);
751 			if(format != FORMAT_A16B16G16R16UI)
752 			{
753 				c = Insert(c, Int(0xFFFF), 3); // Set alpha
754 			}
755 			break;
756 		case FORMAT_A32B32G32R32I:
757 			c = *Pointer<Int4>(element);
758 			break;
759 		case FORMAT_X32B32G32R32I:
760 			c = Insert(c, *Pointer<Int>(element + 8), 2);
761 		case FORMAT_G32R32I:
762 			c = Insert(c, *Pointer<Int>(element + 4), 1);
763 		case FORMAT_R32I:
764 			c = Insert(c, *Pointer<Int>(element), 0);
765 			c = Insert(c, Int(0x7FFFFFFF), 3); // Set alpha
766 			break;
767 		case FORMAT_A32B32G32R32UI:
768 			c = *Pointer<UInt4>(element);
769 			break;
770 		case FORMAT_X32B32G32R32UI:
771 			c = Insert(c, Int(*Pointer<UInt>(element + 8)), 2);
772 		case FORMAT_G32R32UI:
773 			c = Insert(c, Int(*Pointer<UInt>(element + 4)), 1);
774 		case FORMAT_R32UI:
775 			c = Insert(c, Int(*Pointer<UInt>(element)), 0);
776 			c = Insert(c, Int(UInt(0xFFFFFFFFU)), 3); // Set alpha
777 			break;
778 		default:
779 			return false;
780 		}
781 
782 		return true;
783 	}
784 
write(Int4 & c,Pointer<Byte> element,Format format,const Blitter::Options & options)785 	bool Blitter::write(Int4 &c, Pointer<Byte> element, Format format, const Blitter::Options& options)
786 	{
787 		bool writeR = (options & WRITE_RED) == WRITE_RED;
788 		bool writeG = (options & WRITE_GREEN) == WRITE_GREEN;
789 		bool writeB = (options & WRITE_BLUE) == WRITE_BLUE;
790 		bool writeA = (options & WRITE_ALPHA) == WRITE_ALPHA;
791 		bool writeRGBA = writeR && writeG && writeB && writeA;
792 
793 		switch(format)
794 		{
795 		case FORMAT_A8B8G8R8I:
796 			if(writeA) { *Pointer<SByte>(element + 3) = SByte(Extract(c, 3)); }
797 		case FORMAT_X8B8G8R8I:
798 			if(writeA && (format != FORMAT_A8B8G8R8I))
799 			{
800 				*Pointer<SByte>(element + 3) = SByte(0x7F);
801 			}
802 			if(writeB) { *Pointer<SByte>(element + 2) = SByte(Extract(c, 2)); }
803 		case FORMAT_G8R8I:
804 			if(writeG) { *Pointer<SByte>(element + 1) = SByte(Extract(c, 1)); }
805 		case FORMAT_R8I:
806 			if(writeR) { *Pointer<SByte>(element) = SByte(Extract(c, 0)); }
807 			break;
808 		case FORMAT_A8B8G8R8UI:
809 			if(writeA) { *Pointer<Byte>(element + 3) = Byte(Extract(c, 3)); }
810 		case FORMAT_X8B8G8R8UI:
811 			if(writeA && (format != FORMAT_A8B8G8R8UI))
812 			{
813 				*Pointer<Byte>(element + 3) = Byte(0xFF);
814 			}
815 			if(writeB) { *Pointer<Byte>(element + 2) = Byte(Extract(c, 2)); }
816 		case FORMAT_G8R8UI:
817 			if(writeG) { *Pointer<Byte>(element + 1) = Byte(Extract(c, 1)); }
818 		case FORMAT_R8UI:
819 			if(writeR) { *Pointer<Byte>(element) = Byte(Extract(c, 0)); }
820 			break;
821 		case FORMAT_A16B16G16R16I:
822 			if(writeA) { *Pointer<Short>(element + 6) = Short(Extract(c, 3)); }
823 		case FORMAT_X16B16G16R16I:
824 			if(writeA && (format != FORMAT_A16B16G16R16I))
825 			{
826 				*Pointer<Short>(element + 6) = Short(0x7FFF);
827 			}
828 			if(writeB) { *Pointer<Short>(element + 4) = Short(Extract(c, 2)); }
829 		case FORMAT_G16R16I:
830 			if(writeG) { *Pointer<Short>(element + 2) = Short(Extract(c, 1)); }
831 		case FORMAT_R16I:
832 			if(writeR) { *Pointer<Short>(element) = Short(Extract(c, 0)); }
833 			break;
834 		case FORMAT_A16B16G16R16UI:
835 			if(writeA) { *Pointer<UShort>(element + 6) = UShort(Extract(c, 3)); }
836 		case FORMAT_X16B16G16R16UI:
837 			if(writeA && (format != FORMAT_A16B16G16R16UI))
838 			{
839 				*Pointer<UShort>(element + 6) = UShort(0xFFFF);
840 			}
841 			if(writeB) { *Pointer<UShort>(element + 4) = UShort(Extract(c, 2)); }
842 		case FORMAT_G16R16UI:
843 			if(writeG) { *Pointer<UShort>(element + 2) = UShort(Extract(c, 1)); }
844 		case FORMAT_R16UI:
845 			if(writeR) { *Pointer<UShort>(element) = UShort(Extract(c, 0)); }
846 			break;
847 		case FORMAT_A32B32G32R32I:
848 			if(writeRGBA)
849 			{
850 				*Pointer<Int4>(element) = c;
851 			}
852 			else
853 			{
854 				if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
855 				if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
856 				if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); }
857 				if(writeA) { *Pointer<Int>(element + 12) = Extract(c, 3); }
858 			}
859 			break;
860 		case FORMAT_X32B32G32R32I:
861 			if(writeRGBA)
862 			{
863 				*Pointer<Int4>(element) = c;
864 			}
865 			else
866 			{
867 				if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
868 				if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
869 				if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); }
870 			}
871 			if(writeA) { *Pointer<Int>(element + 12) = Int(0x7FFFFFFF); }
872 			break;
873 		case FORMAT_G32R32I:
874 			if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
875 			if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
876 			break;
877 		case FORMAT_R32I:
878 			if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
879 			break;
880 		case FORMAT_A32B32G32R32UI:
881 			if(writeRGBA)
882 			{
883 				*Pointer<UInt4>(element) = As<UInt4>(c);
884 			}
885 			else
886 			{
887 				if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
888 				if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
889 				if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); }
890 				if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(Extract(c, 3)); }
891 			}
892 			break;
893 		case FORMAT_X32B32G32R32UI:
894 			if(writeRGBA)
895 			{
896 				*Pointer<UInt4>(element) = As<UInt4>(c);
897 			}
898 			else
899 			{
900 				if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
901 				if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
902 				if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); }
903 			}
904 			if(writeA) { *Pointer<UInt>(element + 3) = UInt(0xFFFFFFFF); }
905 			break;
906 		case FORMAT_G32R32UI:
907 			if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
908 			if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
909 			break;
910 		case FORMAT_R32UI:
911 			if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
912 			break;
913 		default:
914 			return false;
915 		}
916 
917 		return true;
918 	}
919 
GetScale(float4 & scale,Format format)920 	bool Blitter::GetScale(float4& scale, Format format)
921 	{
922 		switch(format)
923 		{
924 		case FORMAT_L8:
925 		case FORMAT_A8:
926 		case FORMAT_A8R8G8B8:
927 		case FORMAT_X8R8G8B8:
928 		case FORMAT_R8:
929 		case FORMAT_G8R8:
930 		case FORMAT_R8G8B8:
931 		case FORMAT_B8G8R8:
932 		case FORMAT_X8B8G8R8:
933 		case FORMAT_A8B8G8R8:
934 		case FORMAT_SRGB8_X8:
935 		case FORMAT_SRGB8_A8:
936 			scale = vector(0xFF, 0xFF, 0xFF, 0xFF);
937 			break;
938 		case FORMAT_R8I_SNORM:
939 		case FORMAT_G8R8I_SNORM:
940 		case FORMAT_X8B8G8R8I_SNORM:
941 		case FORMAT_A8B8G8R8I_SNORM:
942 			scale = vector(0x7F, 0x7F, 0x7F, 0x7F);
943 			break;
944 		case FORMAT_A16B16G16R16:
945 			scale = vector(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF);
946 			break;
947 		case FORMAT_R8I:
948 		case FORMAT_R8UI:
949 		case FORMAT_G8R8I:
950 		case FORMAT_G8R8UI:
951 		case FORMAT_X8B8G8R8I:
952 		case FORMAT_X8B8G8R8UI:
953 		case FORMAT_A8B8G8R8I:
954 		case FORMAT_A8B8G8R8UI:
955 		case FORMAT_R16I:
956 		case FORMAT_R16UI:
957 		case FORMAT_G16R16:
958 		case FORMAT_G16R16I:
959 		case FORMAT_G16R16UI:
960 		case FORMAT_X16B16G16R16I:
961 		case FORMAT_X16B16G16R16UI:
962 		case FORMAT_A16B16G16R16I:
963 		case FORMAT_A16B16G16R16UI:
964 		case FORMAT_R32I:
965 		case FORMAT_R32UI:
966 		case FORMAT_G32R32I:
967 		case FORMAT_G32R32UI:
968 		case FORMAT_X32B32G32R32I:
969 		case FORMAT_X32B32G32R32UI:
970 		case FORMAT_A32B32G32R32I:
971 		case FORMAT_A32B32G32R32UI:
972 		case FORMAT_A32B32G32R32F:
973 		case FORMAT_X32B32G32R32F:
974 		case FORMAT_B32G32R32F:
975 		case FORMAT_G32R32F:
976 		case FORMAT_R32F:
977 			scale = vector(1.0f, 1.0f, 1.0f, 1.0f);
978 			break;
979 		case FORMAT_R5G6B5:
980 			scale = vector(0x1F, 0x3F, 0x1F, 1.0f);
981 			break;
982 		case FORMAT_A2B10G10R10:
983 			scale = vector(0x3FF, 0x3FF, 0x3FF, 0x03);
984 			break;
985 		case FORMAT_D16:
986 			scale = vector(0xFFFF, 0.0f, 0.0f, 0.0f);
987 			break;
988 		case FORMAT_D24S8:
989 			scale = vector(0xFFFFFF, 0.0f, 0.0f, 0.0f);
990 			break;
991 		case FORMAT_D32:
992 			scale = vector(0xFFFFFFFF, 0.0f, 0.0f, 0.0f);
993 			break;
994 		case FORMAT_D32F:
995 		case FORMAT_D32F_COMPLEMENTARY:
996 		case FORMAT_D32F_LOCKABLE:
997 		case FORMAT_D32FS8_TEXTURE:
998 		case FORMAT_D32FS8_SHADOW:
999 			scale = vector(1.0f, 0.0f, 0.0f, 0.0f);
1000 			break;
1001 		default:
1002 			return false;
1003 		}
1004 
1005 		return true;
1006 	}
1007 
ApplyScaleAndClamp(Float4 & value,const BlitState & state)1008 	bool Blitter::ApplyScaleAndClamp(Float4& value, const BlitState& state)
1009 	{
1010 		float4 scale, unscale;
1011 		if(Surface::isNonNormalizedInteger(state.sourceFormat) &&
1012 		   !Surface::isNonNormalizedInteger(state.destFormat) &&
1013 		   (state.options & CLEAR_OPERATION))
1014 		{
1015 			// If we're clearing a buffer from an int or uint color into a normalized color,
1016 			// then the whole range of the int or uint color must be scaled between 0 and 1.
1017 			switch(state.sourceFormat)
1018 			{
1019 			case FORMAT_A32B32G32R32I:
1020 				unscale = vector(0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF);
1021 				break;
1022 			case FORMAT_A32B32G32R32UI:
1023 				unscale = vector(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1024 				break;
1025 			default:
1026 				return false;
1027 			}
1028 		}
1029 		else if(!GetScale(unscale, state.sourceFormat))
1030 		{
1031 			return false;
1032 		}
1033 
1034 		if(!GetScale(scale, state.destFormat))
1035 		{
1036 			return false;
1037 		}
1038 
1039 		if(unscale != scale)
1040 		{
1041 			value *= Float4(scale.x / unscale.x, scale.y / unscale.y, scale.z / unscale.z, scale.w / unscale.w);
1042 		}
1043 
1044 		if(Surface::isFloatFormat(state.sourceFormat) && !Surface::isFloatFormat(state.destFormat))
1045 		{
1046 			value = Min(value, Float4(scale.x, scale.y, scale.z, scale.w));
1047 
1048 			value = Max(value, Float4(Surface::isUnsignedComponent(state.destFormat, 0) ? 0.0f : -scale.x,
1049 			                          Surface::isUnsignedComponent(state.destFormat, 1) ? 0.0f : -scale.y,
1050 			                          Surface::isUnsignedComponent(state.destFormat, 2) ? 0.0f : -scale.z,
1051 			                          Surface::isUnsignedComponent(state.destFormat, 3) ? 0.0f : -scale.w));
1052 		}
1053 
1054 		return true;
1055 	}
1056 
generate(BlitState & state)1057 	Routine *Blitter::generate(BlitState &state)
1058 	{
1059 		Function<Void(Pointer<Byte>)> function;
1060 		{
1061 			Pointer<Byte> blit(function.Arg<0>());
1062 
1063 			Pointer<Byte> source = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData,source));
1064 			Pointer<Byte> dest = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData,dest));
1065 			Int sPitchB = *Pointer<Int>(blit + OFFSET(BlitData,sPitchB));
1066 			Int dPitchB = *Pointer<Int>(blit + OFFSET(BlitData,dPitchB));
1067 
1068 			Float x0 = *Pointer<Float>(blit + OFFSET(BlitData,x0));
1069 			Float y0 = *Pointer<Float>(blit + OFFSET(BlitData,y0));
1070 			Float w = *Pointer<Float>(blit + OFFSET(BlitData,w));
1071 			Float h = *Pointer<Float>(blit + OFFSET(BlitData,h));
1072 
1073 			Int x0d = *Pointer<Int>(blit + OFFSET(BlitData,x0d));
1074 			Int x1d = *Pointer<Int>(blit + OFFSET(BlitData,x1d));
1075 			Int y0d = *Pointer<Int>(blit + OFFSET(BlitData,y0d));
1076 			Int y1d = *Pointer<Int>(blit + OFFSET(BlitData,y1d));
1077 
1078 			Int sWidth = *Pointer<Int>(blit + OFFSET(BlitData,sWidth));
1079 			Int sHeight = *Pointer<Int>(blit + OFFSET(BlitData,sHeight));
1080 
1081 			bool intSrc = Surface::isNonNormalizedInteger(state.sourceFormat);
1082 			bool intDst = Surface::isNonNormalizedInteger(state.destFormat);
1083 			bool intBoth = intSrc && intDst;
1084 
1085 			bool hasConstantColorI = false;
1086 			Int4 constantColorI;
1087 			bool hasConstantColorF = false;
1088 			Float4 constantColorF;
1089 			if(state.options & CLEAR_OPERATION)
1090 			{
1091 				if(intBoth) // Integer types
1092 				{
1093 					if(!read(constantColorI, source, state.sourceFormat))
1094 					{
1095 						return nullptr;
1096 					}
1097 					hasConstantColorI = true;
1098 				}
1099 				else
1100 				{
1101 					if(!read(constantColorF, source, state.sourceFormat))
1102 					{
1103 						return nullptr;
1104 					}
1105 					hasConstantColorF = true;
1106 
1107 					if(!ApplyScaleAndClamp(constantColorF, state))
1108 					{
1109 						return nullptr;
1110 					}
1111 				}
1112 			}
1113 
1114 			Float y = y0;
1115 
1116 			For(Int j = y0d, j < y1d, j++)
1117 			{
1118 				Float x = x0;
1119 				Pointer<Byte> destLine = dest + j * dPitchB;
1120 
1121 				For(Int i = x0d, i < x1d, i++)
1122 				{
1123 					Pointer<Byte> d = destLine + i * Surface::bytes(state.destFormat);
1124 					if(hasConstantColorI)
1125 					{
1126 						if(!write(constantColorI, d, state.destFormat, state.options))
1127 						{
1128 							return nullptr;
1129 						}
1130 					}
1131 					else if(hasConstantColorF)
1132 					{
1133 						if(!write(constantColorF, d, state.destFormat, state.options))
1134 						{
1135 							return nullptr;
1136 						}
1137 					}
1138 					else if(intBoth) // Integer types do not support filtering
1139 					{
1140 						Int4 color; // When both formats are true integer types, we don't go to float to avoid losing precision
1141 						Pointer<Byte> s = source + Int(y) * sPitchB + Int(x) * Surface::bytes(state.sourceFormat);
1142 						if(!read(color, s, state.sourceFormat))
1143 						{
1144 							return nullptr;
1145 						}
1146 
1147 						if(!write(color, d, state.destFormat, state.options))
1148 						{
1149 							return nullptr;
1150 						}
1151 					}
1152 					else
1153 					{
1154 						Float4 color;
1155 
1156 						if(!(state.options & FILTER_LINEAR) || intSrc)
1157 						{
1158 							Int X = Int(x);
1159 							Int Y = Int(y);
1160 
1161 							Pointer<Byte> s = source + Y * sPitchB + X * Surface::bytes(state.sourceFormat);
1162 
1163 							if(!read(color, s, state.sourceFormat))
1164 							{
1165 								return nullptr;
1166 							}
1167 						}
1168 						else   // Bilinear filtering
1169 						{
1170 							Float x0 = x - 0.5f;
1171 							Float y0 = y - 0.5f;
1172 
1173 							Int X0 = Max(Int(x0), 0);
1174 							Int Y0 = Max(Int(y0), 0);
1175 
1176 							Int X1 = IfThenElse(X0 + 1 >= sWidth, X0, X0 + 1);
1177 							Int Y1 = IfThenElse(Y0 + 1 >= sHeight, Y0, Y0 + 1);
1178 
1179 							Pointer<Byte> s00 = source + Y0 * sPitchB + X0 * Surface::bytes(state.sourceFormat);
1180 							Pointer<Byte> s01 = source + Y0 * sPitchB + X1 * Surface::bytes(state.sourceFormat);
1181 							Pointer<Byte> s10 = source + Y1 * sPitchB + X0 * Surface::bytes(state.sourceFormat);
1182 							Pointer<Byte> s11 = source + Y1 * sPitchB + X1 * Surface::bytes(state.sourceFormat);
1183 
1184 							Float4 c00; if(!read(c00, s00, state.sourceFormat)) return nullptr;
1185 							Float4 c01; if(!read(c01, s01, state.sourceFormat)) return nullptr;
1186 							Float4 c10; if(!read(c10, s10, state.sourceFormat)) return nullptr;
1187 							Float4 c11; if(!read(c11, s11, state.sourceFormat)) return nullptr;
1188 
1189 							Float4 fx = Float4(x0 - Float(X0));
1190 							Float4 fy = Float4(y0 - Float(Y0));
1191 
1192 							color = c00 * (Float4(1.0f) - fx) * (Float4(1.0f) - fy) +
1193 							        c01 * fx * (Float4(1.0f) - fy) +
1194 							        c10 * (Float4(1.0f) - fx) * fy +
1195 							        c11 * fx * fy;
1196 						}
1197 
1198 						if(!ApplyScaleAndClamp(color, state) || !write(color, d, state.destFormat, state.options))
1199 						{
1200 							return nullptr;
1201 						}
1202 					}
1203 
1204 					if(!hasConstantColorI && !hasConstantColorF) { x += w; }
1205 				}
1206 
1207 				if(!hasConstantColorI && !hasConstantColorF) { y += h; }
1208 			}
1209 		}
1210 
1211 		return function(L"BlitRoutine");
1212 	}
1213 
blitReactor(Surface * source,const SliceRect & sourceRect,Surface * dest,const SliceRect & destRect,const Blitter::Options & options)1214 	bool Blitter::blitReactor(Surface *source, const SliceRect &sourceRect, Surface *dest, const SliceRect &destRect, const Blitter::Options& options)
1215 	{
1216 		ASSERT(!(options & CLEAR_OPERATION) || ((source->getWidth() == 1) && (source->getHeight() == 1) && (source->getDepth() == 1)));
1217 
1218 		Rect dRect = destRect;
1219 		Rect sRect = sourceRect;
1220 		if(destRect.x0 > destRect.x1)
1221 		{
1222 			swap(dRect.x0, dRect.x1);
1223 			swap(sRect.x0, sRect.x1);
1224 		}
1225 		if(destRect.y0 > destRect.y1)
1226 		{
1227 			swap(dRect.y0, dRect.y1);
1228 			swap(sRect.y0, sRect.y1);
1229 		}
1230 
1231 		BlitState state;
1232 
1233 		bool useSourceInternal = !source->isExternalDirty();
1234 		bool useDestInternal = !dest->isExternalDirty();
1235 
1236 		state.sourceFormat = source->getFormat(useSourceInternal);
1237 		state.destFormat = dest->getFormat(useDestInternal);
1238 		state.options = options;
1239 
1240 		criticalSection.lock();
1241 		Routine *blitRoutine = blitCache->query(state);
1242 
1243 		if(!blitRoutine)
1244 		{
1245 			blitRoutine = generate(state);
1246 
1247 			if(!blitRoutine)
1248 			{
1249 				criticalSection.unlock();
1250 				return false;
1251 			}
1252 
1253 			blitCache->add(state, blitRoutine);
1254 		}
1255 
1256 		criticalSection.unlock();
1257 
1258 		void (*blitFunction)(const BlitData *data) = (void(*)(const BlitData*))blitRoutine->getEntry();
1259 
1260 		BlitData data;
1261 
1262 		bool isRGBA = ((options & WRITE_RGBA) == WRITE_RGBA);
1263 		bool isEntireDest = dest->isEntire(destRect);
1264 
1265 		data.source = source->lock(0, 0, sourceRect.slice, sw::LOCK_READONLY, sw::PUBLIC, useSourceInternal);
1266 		data.dest = dest->lock(0, 0, destRect.slice, isRGBA ? (isEntireDest ? sw::LOCK_DISCARD : sw::LOCK_WRITEONLY) : sw::LOCK_READWRITE, sw::PUBLIC, useDestInternal);
1267 		data.sPitchB = source->getPitchB(useSourceInternal);
1268 		data.dPitchB = dest->getPitchB(useDestInternal);
1269 
1270 		data.w = 1.0f / (dRect.x1 - dRect.x0) * (sRect.x1 - sRect.x0);
1271 		data.h = 1.0f / (dRect.y1 - dRect.y0) * (sRect.y1 - sRect.y0);
1272 		data.x0 = (float)sRect.x0 + 0.5f * data.w;
1273 		data.y0 = (float)sRect.y0 + 0.5f * data.h;
1274 
1275 		data.x0d = dRect.x0;
1276 		data.x1d = dRect.x1;
1277 		data.y0d = dRect.y0;
1278 		data.y1d = dRect.y1;
1279 
1280 		data.sWidth = source->getWidth();
1281 		data.sHeight = source->getHeight();
1282 
1283 		blitFunction(&data);
1284 
1285 		source->unlock(useSourceInternal);
1286 		dest->unlock(useDestInternal);
1287 
1288 		return true;
1289 	}
1290 }
1291