1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "Blitter.hpp"
16 
17 #include "Pipeline/ShaderCore.hpp"
18 #include "Reactor/Reactor.hpp"
19 #include "System/Memory.hpp"
20 #include "Vulkan/VkDebug.hpp"
21 
22 namespace sw
23 {
Blitter()24 	Blitter::Blitter()
25 	{
26 		blitCache = new RoutineCache<State>(1024);
27 	}
28 
~Blitter()29 	Blitter::~Blitter()
30 	{
31 		delete blitCache;
32 	}
33 
clear(void * pixel,VkFormat format,Surface * dest,const SliceRect & dRect,unsigned int rgbaMask)34 	void Blitter::clear(void *pixel, VkFormat format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask)
35 	{
36 		if(fastClear(pixel, format, dest, dRect, rgbaMask))
37 		{
38 			return;
39 		}
40 
41 		sw::Surface *color = sw::Surface::create(1, 1, 1, format, pixel, sw::Surface::bytes(format), sw::Surface::bytes(format));
42 		SliceRectF sRect(0.5f, 0.5f, 0.5f, 0.5f, 0);   // Sample from the middle.
43 		blit(color, sRect, dest, dRect, {rgbaMask});
44 		delete color;
45 	}
46 
fastClear(void * pixel,VkFormat format,Surface * dest,const SliceRect & dRect,unsigned int rgbaMask)47 	bool Blitter::fastClear(void *pixel, VkFormat format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask)
48 	{
49 		if(format != VK_FORMAT_R32G32B32A32_SFLOAT)
50 		{
51 			return false;
52 		}
53 
54 		float *color = (float*)pixel;
55 		float r = color[0];
56 		float g = color[1];
57 		float b = color[2];
58 		float a = color[3];
59 
60 		uint32_t packed;
61 
62 		switch(dest->getFormat())
63 		{
64 		case VK_FORMAT_R5G6B5_UNORM_PACK16:
65 			if((rgbaMask & 0x7) != 0x7) return false;
66 			packed = ((uint16_t)(31 * b + 0.5f) << 0) |
67 			         ((uint16_t)(63 * g + 0.5f) << 5) |
68 			         ((uint16_t)(31 * r + 0.5f) << 11);
69 			break;
70 		case VK_FORMAT_B5G6R5_UNORM_PACK16:
71 			if((rgbaMask & 0x7) != 0x7) return false;
72 			packed = ((uint16_t)(31 * r + 0.5f) << 0) |
73 			         ((uint16_t)(63 * g + 0.5f) << 5) |
74 			         ((uint16_t)(31 * b + 0.5f) << 11);
75 			break;
76 		case VK_FORMAT_A8B8G8R8_UINT_PACK32:
77 		case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
78 		case VK_FORMAT_R8G8B8A8_UNORM:
79 			if((rgbaMask & 0xF) != 0xF) return false;
80 			packed = ((uint32_t)(255 * a + 0.5f) << 24) |
81 			         ((uint32_t)(255 * b + 0.5f) << 16) |
82 			         ((uint32_t)(255 * g + 0.5f) << 8) |
83 			         ((uint32_t)(255 * r + 0.5f) << 0);
84 			break;
85 		case VK_FORMAT_B8G8R8A8_UNORM:
86 			if((rgbaMask & 0xF) != 0xF) return false;
87 			packed = ((uint32_t)(255 * a + 0.5f) << 24) |
88 			         ((uint32_t)(255 * r + 0.5f) << 16) |
89 			         ((uint32_t)(255 * g + 0.5f) << 8) |
90 			         ((uint32_t)(255 * b + 0.5f) << 0);
91 			break;
92 		case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
93 			if((rgbaMask & 0x7) != 0x7) return false;
94 			packed = R11G11B10F(color);
95 			break;
96 		case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
97 			if((rgbaMask & 0x7) != 0x7) return false;
98 			packed = RGB9E5(color);
99 			break;
100 		default:
101 			return false;
102 		}
103 
104 		bool useDestInternal = !dest->isExternalDirty();
105 		uint8_t *slice = (uint8_t*)dest->lock(dRect.x0, dRect.y0, dRect.slice, sw::LOCK_WRITEONLY, sw::PUBLIC, useDestInternal);
106 
107 		for(int j = 0; j < dest->getSamples(); j++)
108 		{
109 			uint8_t *d = slice;
110 
111 			switch(Surface::bytes(dest->getFormat()))
112 			{
113 			case 2:
114 				for(int i = dRect.y0; i < dRect.y1; i++)
115 				{
116 					sw::clear((uint16_t*)d, packed, dRect.x1 - dRect.x0);
117 					d += dest->getPitchB(useDestInternal);
118 				}
119 				break;
120 			case 4:
121 				for(int i = dRect.y0; i < dRect.y1; i++)
122 				{
123 					sw::clear((uint32_t*)d, packed, dRect.x1 - dRect.x0);
124 					d += dest->getPitchB(useDestInternal);
125 				}
126 				break;
127 			default:
128 				assert(false);
129 			}
130 
131 			slice += dest->getSliceB(useDestInternal);
132 		}
133 
134 		dest->unlock(useDestInternal);
135 
136 		return true;
137 	}
138 
blit(Surface * source,const SliceRectF & sourceRect,Surface * dest,const SliceRect & destRect,const Blitter::Options & options)139 	void Blitter::blit(Surface *source, const SliceRectF &sourceRect, Surface *dest, const SliceRect &destRect, const Blitter::Options& options)
140 	{
141 		if(dest->getInternalFormat() == VK_FORMAT_UNDEFINED)
142 		{
143 			return;
144 		}
145 
146 		if(blitReactor(source, sourceRect, dest, destRect, options))
147 		{
148 			return;
149 		}
150 
151 		SliceRectF sRect = sourceRect;
152 		SliceRect dRect = destRect;
153 
154 		bool flipX = destRect.x0 > destRect.x1;
155 		bool flipY = destRect.y0 > destRect.y1;
156 
157 		if(flipX)
158 		{
159 			swap(dRect.x0, dRect.x1);
160 			swap(sRect.x0, sRect.x1);
161 		}
162 		if(flipY)
163 		{
164 			swap(dRect.y0, dRect.y1);
165 			swap(sRect.y0, sRect.y1);
166 		}
167 
168 		source->lockInternal(0, 0, sRect.slice, sw::LOCK_READONLY, sw::PUBLIC);
169 		dest->lockInternal(0, 0, dRect.slice, sw::LOCK_WRITEONLY, sw::PUBLIC);
170 
171 		float w = sRect.width() / dRect.width();
172 		float h = sRect.height() / dRect.height();
173 
174 		float xStart = sRect.x0 + (0.5f - dRect.x0) * w;
175 		float yStart = sRect.y0 + (0.5f - dRect.y0) * h;
176 
177 		for(int j = dRect.y0; j < dRect.y1; j++)
178 		{
179 			float y = yStart + j * h;
180 
181 			for(int i = dRect.x0; i < dRect.x1; i++)
182 			{
183 				float x = xStart + i * w;
184 
185 				// FIXME: Support RGBA mask
186 				dest->copyInternal(source, i, j, x, y, options.filter);
187 			}
188 		}
189 
190 		source->unlockInternal();
191 		dest->unlockInternal();
192 	}
193 
blit3D(Surface * source,Surface * dest)194 	void Blitter::blit3D(Surface *source, Surface *dest)
195 	{
196 		source->lockInternal(0, 0, 0, sw::LOCK_READONLY, sw::PUBLIC);
197 		dest->lockInternal(0, 0, 0, sw::LOCK_WRITEONLY, sw::PUBLIC);
198 
199 		float w = static_cast<float>(source->getWidth())  / static_cast<float>(dest->getWidth());
200 		float h = static_cast<float>(source->getHeight()) / static_cast<float>(dest->getHeight());
201 		float d = static_cast<float>(source->getDepth())  / static_cast<float>(dest->getDepth());
202 
203 		for(int k = 0; k < dest->getDepth(); k++)
204 		{
205 			float z = (k + 0.5f) * d;
206 
207 			for(int j = 0; j < dest->getHeight(); j++)
208 			{
209 				float y = (j + 0.5f) * h;
210 
211 				for(int i = 0; i < dest->getWidth(); i++)
212 				{
213 					float x = (i + 0.5f) * w;
214 
215 					dest->copyInternal(source, i, j, k, x, y, z, true);
216 				}
217 			}
218 		}
219 
220 		source->unlockInternal();
221 		dest->unlockInternal();
222 	}
223 
read(Float4 & c,Pointer<Byte> element,const State & state)224 	bool Blitter::read(Float4 &c, Pointer<Byte> element, const State &state)
225 	{
226 		c = Float4(0.0f, 0.0f, 0.0f, 1.0f);
227 
228 		switch(state.sourceFormat)
229 		{
230 		case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
231 			c.w = Float(Int(*Pointer<Byte>(element)) & Int(0xF));
232 			c.x = Float((Int(*Pointer<Byte>(element)) >> 4) & Int(0xF));
233 			c.y = Float(Int(*Pointer<Byte>(element + 1)) & Int(0xF));
234 			c.z = Float((Int(*Pointer<Byte>(element + 1)) >> 4) & Int(0xF));
235 			break;
236 		case VK_FORMAT_R8_SINT:
237 		case VK_FORMAT_R8_SNORM:
238 			c.x = Float(Int(*Pointer<SByte>(element)));
239 			c.w = float(0x7F);
240 			break;
241 		case VK_FORMAT_R8_UNORM:
242 		case VK_FORMAT_R8_UINT:
243 			c.x = Float(Int(*Pointer<Byte>(element)));
244 			c.w = float(0xFF);
245 			break;
246 		case VK_FORMAT_R16_SINT:
247 			c.x = Float(Int(*Pointer<Short>(element)));
248 			c.w = float(0x7FFF);
249 			break;
250 		case VK_FORMAT_R16_UINT:
251 			c.x = Float(Int(*Pointer<UShort>(element)));
252 			c.w = float(0xFFFF);
253 			break;
254 		case VK_FORMAT_R32_SINT:
255 			c.x = Float(*Pointer<Int>(element));
256 			c.w = float(0x7FFFFFFF);
257 			break;
258 		case VK_FORMAT_R32_UINT:
259 			c.x = Float(*Pointer<UInt>(element));
260 			c.w = float(0xFFFFFFFF);
261 			break;
262 		case VK_FORMAT_B8G8R8A8_SRGB:
263 		case VK_FORMAT_B8G8R8A8_UNORM:
264 			c = Float4(*Pointer<Byte4>(element)).zyxw;
265 			break;
266 		case VK_FORMAT_A8B8G8R8_SINT_PACK32:
267 		case VK_FORMAT_R8G8B8A8_SINT:
268 		case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
269 		case VK_FORMAT_R8G8B8A8_SNORM:
270 			c = Float4(*Pointer<SByte4>(element));
271 			break;
272 		case VK_FORMAT_A8B8G8R8_UINT_PACK32:
273 		case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
274 		case VK_FORMAT_R8G8B8A8_UNORM:
275 		case VK_FORMAT_R8G8B8A8_UINT:
276 		case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
277 		case VK_FORMAT_R8G8B8A8_SRGB:
278 			c = Float4(*Pointer<Byte4>(element));
279 			break;
280 		case VK_FORMAT_R16G16B16A16_SINT:
281 			c = Float4(*Pointer<Short4>(element));
282 			break;
283 		case VK_FORMAT_R16G16B16A16_UNORM:
284 		case VK_FORMAT_R16G16B16A16_UINT:
285 			c = Float4(*Pointer<UShort4>(element));
286 			break;
287 		case VK_FORMAT_R32G32B32A32_SINT:
288 			c = Float4(*Pointer<Int4>(element));
289 			break;
290 		case VK_FORMAT_R32G32B32A32_UINT:
291 			c = Float4(*Pointer<UInt4>(element));
292 			break;
293 		case VK_FORMAT_R8G8_SINT:
294 		case VK_FORMAT_R8G8_SNORM:
295 			c.x = Float(Int(*Pointer<SByte>(element + 0)));
296 			c.y = Float(Int(*Pointer<SByte>(element + 1)));
297 			c.w = float(0x7F);
298 			break;
299 		case VK_FORMAT_R8G8_UNORM:
300 		case VK_FORMAT_R8G8_UINT:
301 			c.x = Float(Int(*Pointer<Byte>(element + 0)));
302 			c.y = Float(Int(*Pointer<Byte>(element + 1)));
303 			c.w = float(0xFF);
304 			break;
305 		case VK_FORMAT_R16G16_SINT:
306 			c.x = Float(Int(*Pointer<Short>(element + 0)));
307 			c.y = Float(Int(*Pointer<Short>(element + 2)));
308 			c.w = float(0x7FFF);
309 			break;
310 		case VK_FORMAT_R16G16_UNORM:
311 		case VK_FORMAT_R16G16_UINT:
312 			c.x = Float(Int(*Pointer<UShort>(element + 0)));
313 			c.y = Float(Int(*Pointer<UShort>(element + 2)));
314 			c.w = float(0xFFFF);
315 			break;
316 		case VK_FORMAT_R32G32_SINT:
317 			c.x = Float(*Pointer<Int>(element + 0));
318 			c.y = Float(*Pointer<Int>(element + 4));
319 			c.w = float(0x7FFFFFFF);
320 			break;
321 		case VK_FORMAT_R32G32_UINT:
322 			c.x = Float(*Pointer<UInt>(element + 0));
323 			c.y = Float(*Pointer<UInt>(element + 4));
324 			c.w = float(0xFFFFFFFF);
325 			break;
326 		case VK_FORMAT_R32G32B32A32_SFLOAT:
327 			c = *Pointer<Float4>(element);
328 			break;
329 		case VK_FORMAT_R32G32_SFLOAT:
330 			c.x = *Pointer<Float>(element + 0);
331 			c.y = *Pointer<Float>(element + 4);
332 			break;
333 		case VK_FORMAT_R32_SFLOAT:
334 			c.x = *Pointer<Float>(element);
335 			break;
336 		case VK_FORMAT_R16G16B16A16_SFLOAT:
337 			c.w = Float(*Pointer<Half>(element + 6));
338 		case VK_FORMAT_R16G16B16_SFLOAT:
339 			c.z = Float(*Pointer<Half>(element + 4));
340 		case VK_FORMAT_R16G16_SFLOAT:
341 			c.y = Float(*Pointer<Half>(element + 2));
342 		case VK_FORMAT_R16_SFLOAT:
343 			c.x = Float(*Pointer<Half>(element));
344 			break;
345 		case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
346 			// 10 (or 11) bit float formats are unsigned formats with a 5 bit exponent and a 5 (or 6) bit mantissa.
347 			// Since the Half float format also has a 5 bit exponent, we can convert these formats to half by
348 			// copy/pasting the bits so the the exponent bits and top mantissa bits are aligned to the half format.
349 			// In this case, we have:
350 			//              B B B B B B B B B B G G G G G G G G G G G R R R R R R R R R R R
351 			// 1st Short:                                  |xxxxxxxxxx---------------------|
352 			// 2nd Short:                  |xxxx---------------------xxxxxx|
353 			// 3rd Short: |--------------------xxxxxxxxxxxx|
354 			// These memory reads overlap, but each of them contains an entire channel, so we can read this without
355 			// any int -> short conversion.
356 			c.x = Float(As<Half>((*Pointer<UShort>(element + 0) & UShort(0x07FF)) << UShort(4)));
357 			c.y = Float(As<Half>((*Pointer<UShort>(element + 1) & UShort(0x3FF8)) << UShort(1)));
358 			c.z = Float(As<Half>((*Pointer<UShort>(element + 2) & UShort(0xFFC0)) >> UShort(1)));
359 			break;
360 		case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
361 			// This type contains a common 5 bit exponent (E) and a 9 bit the mantissa for R, G and B.
362 			c.x = Float(*Pointer<UInt>(element) & UInt(0x000001FF));         // R's mantissa (bits 0-8)
363 			c.y = Float((*Pointer<UInt>(element) & UInt(0x0003FE00)) >> 9);  // G's mantissa (bits 9-17)
364 			c.z = Float((*Pointer<UInt>(element) & UInt(0x07FC0000)) >> 18); // B's mantissa (bits 18-26)
365 			c *= Float4(
366 				// 2^E, using the exponent (bits 27-31) and treating it as an unsigned integer value
367 				Float(UInt(1) << ((*Pointer<UInt>(element) & UInt(0xF8000000)) >> 27)) *
368 				// Since the 9 bit mantissa values currently stored in RGB were converted straight
369 				// from int to float (in the [0, 1<<9] range instead of the [0, 1] range), they
370 				// are (1 << 9) times too high.
371 				// Also, the exponent has 5 bits and we compute the exponent bias of floating point
372 				// formats using "2^(k-1) - 1", so, in this case, the exponent bias is 2^(5-1)-1 = 15
373 				// Exponent bias (15) + number of mantissa bits per component (9) = 24
374 				Float(1.0f / (1 << 24)));
375 			c.w = 1.0f;
376 			break;
377 		case VK_FORMAT_R5G6B5_UNORM_PACK16:
378 			c.x = Float(Int((*Pointer<UShort>(element) & UShort(0xF800)) >> UShort(11)));
379 			c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x07E0)) >> UShort(5)));
380 			c.z = Float(Int(*Pointer<UShort>(element) & UShort(0x001F)));
381 			break;
382 		case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
383 			c.w = Float(Int((*Pointer<UShort>(element) & UShort(0x8000)) >> UShort(15)));
384 			c.x = Float(Int((*Pointer<UShort>(element) & UShort(0x7C00)) >> UShort(10)));
385 			c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x03E0)) >> UShort(5)));
386 			c.z = Float(Int(*Pointer<UShort>(element) & UShort(0x001F)));
387 			break;
388 		case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
389 		case VK_FORMAT_A2B10G10R10_UINT_PACK32:
390 			c.x = Float(Int((*Pointer<UInt>(element) & UInt(0x000003FF))));
391 			c.y = Float(Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10));
392 			c.z = Float(Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20));
393 			c.w = Float(Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30));
394 			break;
395 		case VK_FORMAT_D16_UNORM:
396 			c.x = Float(Int((*Pointer<UShort>(element))));
397 			break;
398 		case VK_FORMAT_D24_UNORM_S8_UINT:
399 		case VK_FORMAT_X8_D24_UNORM_PACK32:
400 			c.x = Float(Int((*Pointer<UInt>(element) & UInt(0xFFFFFF00)) >> 8));
401 			break;
402 		case VK_FORMAT_D32_SFLOAT:
403 		case VK_FORMAT_D32_SFLOAT_S8_UINT:
404 			c.x = *Pointer<Float>(element);
405 			break;
406 		case VK_FORMAT_S8_UINT:
407 			c.x = Float(Int(*Pointer<Byte>(element)));
408 			break;
409 		default:
410 			return false;
411 		}
412 
413 		return true;
414 	}
415 
write(Float4 & c,Pointer<Byte> element,const State & state)416 	bool Blitter::write(Float4 &c, Pointer<Byte> element, const State &state)
417 	{
418 		bool writeR = state.writeRed;
419 		bool writeG = state.writeGreen;
420 		bool writeB = state.writeBlue;
421 		bool writeA = state.writeAlpha;
422 		bool writeRGBA = writeR && writeG && writeB && writeA;
423 
424 		switch(state.destFormat)
425 		{
426 		case VK_FORMAT_R4G4_UNORM_PACK8:
427 			if(writeR | writeG)
428 			{
429 				if(!writeR)
430 				{
431 					*Pointer<Byte>(element) = (Byte(RoundInt(Float(c.y))) & Byte(0xF)) |
432 				                              (*Pointer<Byte>(element) & Byte(0xF0));
433 				}
434 				else if(!writeG)
435 				{
436 					*Pointer<Byte>(element) = (*Pointer<Byte>(element) & Byte(0xF)) |
437 				                              (Byte(RoundInt(Float(c.x))) << Byte(4));
438 				}
439 				else
440 				{
441 					*Pointer<Byte>(element) = (Byte(RoundInt(Float(c.y))) & Byte(0xF)) |
442 				                              (Byte(RoundInt(Float(c.x))) << Byte(4));
443 				}
444 			}
445 			break;
446 		case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
447 			if(writeR || writeG || writeB || writeA)
448 			{
449 				*Pointer<UShort>(element) = (writeR ? ((UShort(RoundInt(Float(c.x))) & UShort(0xF)) << UShort(12)) :
450 				                                      (*Pointer<UShort>(element) & UShort(0x000F))) |
451 				                            (writeG ? ((UShort(RoundInt(Float(c.y))) & UShort(0xF)) << UShort(8)) :
452 				                                      (*Pointer<UShort>(element) & UShort(0x00F0))) |
453 				                            (writeB ? ((UShort(RoundInt(Float(c.z))) & UShort(0xF)) << UShort(4)) :
454 			                                          (*Pointer<UShort>(element) & UShort(0x0F00))) |
455 			                                (writeA ? (UShort(RoundInt(Float(c.w))) & UShort(0xF)) :
456 			                                          (*Pointer<UShort>(element) & UShort(0xF000)));
457 			}
458 			break;
459 		case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
460 			if(writeRGBA)
461 			{
462 				*Pointer<UShort>(element) = UShort(RoundInt(Float(c.w)) & Int(0xF)) |
463 				                            UShort((RoundInt(Float(c.x)) & Int(0xF)) << 4) |
464 				                            UShort((RoundInt(Float(c.y)) & Int(0xF)) << 8) |
465 				                            UShort((RoundInt(Float(c.z)) & Int(0xF)) << 12);
466 			}
467 			else
468 			{
469 				unsigned short mask = (writeA ? 0x000F : 0x0000) |
470 				                      (writeR ? 0x00F0 : 0x0000) |
471 				                      (writeG ? 0x0F00 : 0x0000) |
472 				                      (writeB ? 0xF000 : 0x0000);
473 				unsigned short unmask = ~mask;
474 				*Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
475 				                            ((UShort(RoundInt(Float(c.w)) & Int(0xF)) |
476 				                              UShort((RoundInt(Float(c.x)) & Int(0xF)) << 4) |
477 				                              UShort((RoundInt(Float(c.y)) & Int(0xF)) << 8) |
478 				                              UShort((RoundInt(Float(c.z)) & Int(0xF)) << 12)) & UShort(mask));
479 			}
480 			break;
481 		case VK_FORMAT_B8G8R8A8_SRGB:
482 		case VK_FORMAT_B8G8R8A8_UNORM:
483 			if(writeRGBA)
484 			{
485 				Short4 c0 = RoundShort4(c.zyxw);
486 				*Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0));
487 			}
488 			else
489 			{
490 				if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
491 				if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
492 				if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
493 				if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
494 			}
495 			break;
496 		case VK_FORMAT_B8G8R8_SNORM:
497 			if(writeB) { *Pointer<SByte>(element + 0) = SByte(RoundInt(Float(c.z))); }
498 			if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); }
499 			if(writeR) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.x))); }
500 			break;
501 		case VK_FORMAT_B8G8R8_UNORM:
502 		case VK_FORMAT_B8G8R8_SRGB:
503 			if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
504 			if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
505 			if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
506 			break;
507 		case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
508 		case VK_FORMAT_R8G8B8A8_UNORM:
509 		case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
510 		case VK_FORMAT_R8G8B8A8_SRGB:
511 		case VK_FORMAT_A8B8G8R8_UINT_PACK32:
512 		case VK_FORMAT_R8G8B8A8_UINT:
513 		case VK_FORMAT_R8G8B8A8_USCALED:
514 		case VK_FORMAT_A8B8G8R8_USCALED_PACK32:
515 			if(writeRGBA)
516 			{
517 				Short4 c0 = RoundShort4(c);
518 				*Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0));
519 			}
520 			else
521 			{
522 				if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); }
523 				if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
524 				if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
525 				if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
526 			}
527 			break;
528 		case VK_FORMAT_R32G32B32A32_SFLOAT:
529 			if(writeRGBA)
530 			{
531 				*Pointer<Float4>(element) = c;
532 			}
533 			else
534 			{
535 				if(writeR) { *Pointer<Float>(element) = c.x; }
536 				if(writeG) { *Pointer<Float>(element + 4) = c.y; }
537 				if(writeB) { *Pointer<Float>(element + 8) = c.z; }
538 				if(writeA) { *Pointer<Float>(element + 12) = c.w; }
539 			}
540 			break;
541 		case VK_FORMAT_R32G32B32_SFLOAT:
542 			if(writeR) { *Pointer<Float>(element) = c.x; }
543 			if(writeG) { *Pointer<Float>(element + 4) = c.y; }
544 			if(writeB) { *Pointer<Float>(element + 8) = c.z; }
545 			break;
546 		case VK_FORMAT_R32G32_SFLOAT:
547 			if(writeR && writeG)
548 			{
549 				*Pointer<Float2>(element) = Float2(c);
550 			}
551 			else
552 			{
553 				if(writeR) { *Pointer<Float>(element) = c.x; }
554 				if(writeG) { *Pointer<Float>(element + 4) = c.y; }
555 			}
556 			break;
557 		case VK_FORMAT_R32_SFLOAT:
558 			if(writeR) { *Pointer<Float>(element) = c.x; }
559 			break;
560 		case VK_FORMAT_R16G16B16A16_SFLOAT:
561 			if(writeA) { *Pointer<Half>(element + 6) = Half(c.w); }
562 		case VK_FORMAT_R16G16B16_SFLOAT:
563 			if(writeB) { *Pointer<Half>(element + 4) = Half(c.z); }
564 		case VK_FORMAT_R16G16_SFLOAT:
565 			if(writeG) { *Pointer<Half>(element + 2) = Half(c.y); }
566 		case VK_FORMAT_R16_SFLOAT:
567 			if(writeR) { *Pointer<Half>(element) = Half(c.x); }
568 			break;
569 		case VK_FORMAT_B8G8R8A8_SNORM:
570 			if(writeB) { *Pointer<SByte>(element) = SByte(RoundInt(Float(c.z))); }
571 			if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); }
572 			if(writeR) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.x))); }
573 			if(writeA) { *Pointer<SByte>(element + 3) = SByte(RoundInt(Float(c.w))); }
574 			break;
575 		case VK_FORMAT_A8B8G8R8_SINT_PACK32:
576 		case VK_FORMAT_R8G8B8A8_SINT:
577 		case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
578 		case VK_FORMAT_R8G8B8A8_SNORM:
579 		case VK_FORMAT_R8G8B8A8_SSCALED:
580 		case VK_FORMAT_A8B8G8R8_SSCALED_PACK32:
581 			if(writeA) { *Pointer<SByte>(element + 3) = SByte(RoundInt(Float(c.w))); }
582 		case VK_FORMAT_R8G8B8_SINT:
583 		case VK_FORMAT_R8G8B8_SNORM:
584 		case VK_FORMAT_R8G8B8_SSCALED:
585 		case VK_FORMAT_R8G8B8_SRGB:
586 			if(writeB) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.z))); }
587 		case VK_FORMAT_R8G8_SINT:
588 		case VK_FORMAT_R8G8_SNORM:
589 		case VK_FORMAT_R8G8_SSCALED:
590 		case VK_FORMAT_R8G8_SRGB:
591 			if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); }
592 		case VK_FORMAT_R8_SINT:
593 		case VK_FORMAT_R8_SNORM:
594 		case VK_FORMAT_R8_SSCALED:
595 		case VK_FORMAT_R8_SRGB:
596 			if(writeR) { *Pointer<SByte>(element) = SByte(RoundInt(Float(c.x))); }
597 			break;
598 		case VK_FORMAT_R8G8B8_UINT:
599 		case VK_FORMAT_R8G8B8_UNORM:
600 		case VK_FORMAT_R8G8B8_USCALED:
601 			if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
602 		case VK_FORMAT_R8G8_UINT:
603 		case VK_FORMAT_R8G8_UNORM:
604 		case VK_FORMAT_R8G8_USCALED:
605 			if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
606 		case VK_FORMAT_R8_UINT:
607 		case VK_FORMAT_R8_UNORM:
608 		case VK_FORMAT_R8_USCALED:
609 			if(writeR) { *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x))); }
610 			break;
611 		case VK_FORMAT_R16G16B16A16_SINT:
612 		case VK_FORMAT_R16G16B16A16_SNORM:
613 		case VK_FORMAT_R16G16B16A16_SSCALED:
614 			if(writeRGBA)
615 			{
616 				*Pointer<Short4>(element) = Short4(RoundInt(c));
617 			}
618 			else
619 			{
620 				if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
621 				if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
622 				if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); }
623 				if(writeA) { *Pointer<Short>(element + 6) = Short(RoundInt(Float(c.w))); }
624 			}
625 			break;
626 		case VK_FORMAT_R16G16B16_SINT:
627 		case VK_FORMAT_R16G16B16_SNORM:
628 		case VK_FORMAT_R16G16B16_SSCALED:
629 			if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
630 			if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
631 			if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); }
632 			break;
633 		case VK_FORMAT_R16G16_SINT:
634 		case VK_FORMAT_R16G16_SNORM:
635 		case VK_FORMAT_R16G16_SSCALED:
636 			if(writeR && writeG)
637 			{
638 				*Pointer<Short2>(element) = Short2(Short4(RoundInt(c)));
639 			}
640 			else
641 			{
642 				if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
643 				if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
644 			}
645 			break;
646 		case VK_FORMAT_R16_SINT:
647 		case VK_FORMAT_R16_SNORM:
648 		case VK_FORMAT_R16_SSCALED:
649 			if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
650 			break;
651 		case VK_FORMAT_R16G16B16A16_UINT:
652 		case VK_FORMAT_R16G16B16A16_UNORM:
653 		case VK_FORMAT_R16G16B16A16_USCALED:
654 			if(writeRGBA)
655 			{
656 				*Pointer<UShort4>(element) = UShort4(RoundInt(c));
657 			}
658 			else
659 			{
660 				if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
661 				if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
662 				if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); }
663 				if(writeA) { *Pointer<UShort>(element + 6) = UShort(RoundInt(Float(c.w))); }
664 			}
665 			break;
666 		case VK_FORMAT_R16G16B16_UINT:
667 		case VK_FORMAT_R16G16B16_UNORM:
668 		case VK_FORMAT_R16G16B16_USCALED:
669 			if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
670 			if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
671 			if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); }
672 			break;
673 		case VK_FORMAT_R16G16_UINT:
674 		case VK_FORMAT_R16G16_UNORM:
675 		case VK_FORMAT_R16G16_USCALED:
676 			if(writeR && writeG)
677 			{
678 				*Pointer<UShort2>(element) = UShort2(UShort4(RoundInt(c)));
679 			}
680 			else
681 			{
682 				if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
683 				if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
684 			}
685 			break;
686 		case VK_FORMAT_R16_UINT:
687 		case VK_FORMAT_R16_UNORM:
688 		case VK_FORMAT_R16_USCALED:
689 			if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
690 			break;
691 		case VK_FORMAT_R32G32B32A32_SINT:
692 			if(writeRGBA)
693 			{
694 				*Pointer<Int4>(element) = RoundInt(c);
695 			}
696 			else
697 			{
698 				if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
699 				if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
700 				if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); }
701 				if(writeA) { *Pointer<Int>(element + 12) = RoundInt(Float(c.w)); }
702 			}
703 			break;
704 		case VK_FORMAT_R32G32B32_SINT:
705 			if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); }
706 		case VK_FORMAT_R32G32_SINT:
707 			if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
708 		case VK_FORMAT_R32_SINT:
709 			if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
710 			break;
711 		case VK_FORMAT_R32G32B32A32_UINT:
712 			if(writeRGBA)
713 			{
714 				*Pointer<UInt4>(element) = UInt4(RoundInt(c));
715 			}
716 			else
717 			{
718 				if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
719 				if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
720 				if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); }
721 				if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(RoundInt(Float(c.w))); }
722 			}
723 			break;
724 		case VK_FORMAT_R32G32B32_UINT:
725 			if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); }
726 		case VK_FORMAT_R32G32_UINT:
727 			if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
728 		case VK_FORMAT_R32_UINT:
729 			if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
730 			break;
731 		case VK_FORMAT_R5G6B5_UNORM_PACK16:
732 			if(writeR && writeG && writeB)
733 			{
734 				*Pointer<UShort>(element) = UShort(RoundInt(Float(c.z)) |
735 				                                  (RoundInt(Float(c.y)) << Int(5)) |
736 				                                  (RoundInt(Float(c.x)) << Int(11)));
737 			}
738 			else
739 			{
740 				unsigned short mask = (writeB ? 0x001F : 0x0000) | (writeG ? 0x07E0 : 0x0000) | (writeR ? 0xF800 : 0x0000);
741 				unsigned short unmask = ~mask;
742 				*Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
743 				                            (UShort(RoundInt(Float(c.z)) |
744 				                                   (RoundInt(Float(c.y)) << Int(5)) |
745 				                                   (RoundInt(Float(c.x)) << Int(11))) & UShort(mask));
746 			}
747 			break;
748 		case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
749 			if(writeRGBA)
750 			{
751 				*Pointer<UShort>(element) = UShort(RoundInt(Float(c.w)) |
752 				                                  (RoundInt(Float(c.z)) << Int(1)) |
753 				                                  (RoundInt(Float(c.y)) << Int(6)) |
754 				                                  (RoundInt(Float(c.x)) << Int(11)));
755 			}
756 			else
757 			{
758 				unsigned short mask = (writeA ? 0x8000 : 0x0000) |
759 				                      (writeR ? 0x7C00 : 0x0000) |
760 				                      (writeG ? 0x03E0 : 0x0000) |
761 				                      (writeB ? 0x001F : 0x0000);
762 				unsigned short unmask = ~mask;
763 				*Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
764 				                            (UShort(RoundInt(Float(c.w)) |
765 				                                   (RoundInt(Float(c.z)) << Int(1)) |
766 				                                   (RoundInt(Float(c.y)) << Int(6)) |
767 				                                   (RoundInt(Float(c.x)) << Int(11))) & UShort(mask));
768 			}
769 			break;
770 		case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
771 			if(writeRGBA)
772 			{
773 				*Pointer<UShort>(element) = UShort(RoundInt(Float(c.w)) |
774 				                                  (RoundInt(Float(c.x)) << Int(1)) |
775 				                                  (RoundInt(Float(c.y)) << Int(6)) |
776 				                                  (RoundInt(Float(c.z)) << Int(11)));
777 			}
778 			else
779 			{
780 				unsigned short mask = (writeA ? 0x8000 : 0x0000) |
781 				                      (writeR ? 0x7C00 : 0x0000) |
782 				                      (writeG ? 0x03E0 : 0x0000) |
783 				                      (writeB ? 0x001F : 0x0000);
784 				unsigned short unmask = ~mask;
785 				*Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
786 				                            (UShort(RoundInt(Float(c.w)) |
787 				                                   (RoundInt(Float(c.x)) << Int(1)) |
788 				                                   (RoundInt(Float(c.y)) << Int(6)) |
789 				                                   (RoundInt(Float(c.z)) << Int(11))) & UShort(mask));
790 			}
791 			break;
792 		case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
793 			if(writeRGBA)
794 			{
795 				*Pointer<UShort>(element) = UShort(RoundInt(Float(c.z)) |
796 				                                  (RoundInt(Float(c.y)) << Int(5)) |
797 				                                  (RoundInt(Float(c.x)) << Int(10)) |
798 				                                  (RoundInt(Float(c.w)) << Int(15)));
799 			}
800 			else
801 			{
802 				unsigned short mask = (writeA ? 0x8000 : 0x0000) |
803 				                      (writeR ? 0x7C00 : 0x0000) |
804 				                      (writeG ? 0x03E0 : 0x0000) |
805 				                      (writeB ? 0x001F : 0x0000);
806 				unsigned short unmask = ~mask;
807 				*Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
808 				                            (UShort(RoundInt(Float(c.z)) |
809 				                                   (RoundInt(Float(c.y)) << Int(5)) |
810 				                                   (RoundInt(Float(c.x)) << Int(10)) |
811 				                                   (RoundInt(Float(c.w)) << Int(15))) & UShort(mask));
812 			}
813 			break;
814 		case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
815 		case VK_FORMAT_A2B10G10R10_UINT_PACK32:
816 		case VK_FORMAT_A2B10G10R10_SNORM_PACK32:
817 			if(writeRGBA)
818 			{
819 				*Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)) |
820 				                              (RoundInt(Float(c.y)) << 10) |
821 				                              (RoundInt(Float(c.z)) << 20) |
822 				                              (RoundInt(Float(c.w)) << 30));
823 			}
824 			else
825 			{
826 				unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
827 				                    (writeB ? 0x3FF00000 : 0x0000) |
828 				                    (writeG ? 0x000FFC00 : 0x0000) |
829 				                    (writeR ? 0x000003FF : 0x0000);
830 				unsigned int unmask = ~mask;
831 				*Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
832 				                            (UInt(RoundInt(Float(c.x)) |
833 				                                 (RoundInt(Float(c.y)) << 10) |
834 				                                 (RoundInt(Float(c.z)) << 20) |
835 				                                 (RoundInt(Float(c.w)) << 30)) & UInt(mask));
836 			}
837 			break;
838 		case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
839 		case VK_FORMAT_A2R10G10B10_UINT_PACK32:
840 		case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
841 			if(writeRGBA)
842 			{
843 				*Pointer<UInt>(element) = UInt(RoundInt(Float(c.z)) |
844 				                              (RoundInt(Float(c.y)) << 10) |
845 				                              (RoundInt(Float(c.x)) << 20) |
846 				                              (RoundInt(Float(c.w)) << 30));
847 			}
848 			else
849 			{
850 				unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
851 				                    (writeR ? 0x3FF00000 : 0x0000) |
852 				                    (writeG ? 0x000FFC00 : 0x0000) |
853 				                    (writeB ? 0x000003FF : 0x0000);
854 				unsigned int unmask = ~mask;
855 				*Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
856 				                            (UInt(RoundInt(Float(c.z)) |
857 				                                 (RoundInt(Float(c.y)) << 10) |
858 				                                 (RoundInt(Float(c.x)) << 20) |
859 				                                 (RoundInt(Float(c.w)) << 30)) & UInt(mask));
860 			}
861 			break;
862 		case VK_FORMAT_D16_UNORM:
863 			*Pointer<UShort>(element) = UShort(RoundInt(Float(c.x)));
864 			break;
865 		case VK_FORMAT_D24_UNORM_S8_UINT:
866 		case VK_FORMAT_X8_D24_UNORM_PACK32:
867 			*Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)) << 8);
868 			break;
869 		case VK_FORMAT_D32_SFLOAT:
870 		case VK_FORMAT_D32_SFLOAT_S8_UINT:
871 			*Pointer<Float>(element) = c.x;
872 			break;
873 		case VK_FORMAT_S8_UINT:
874 			*Pointer<Byte>(element) = Byte(RoundInt(Float(c.x)));
875 			break;
876 		default:
877 			return false;
878 		}
879 		return true;
880 	}
881 
read(Int4 & c,Pointer<Byte> element,const State & state)882 	bool Blitter::read(Int4 &c, Pointer<Byte> element, const State &state)
883 	{
884 		c = Int4(0, 0, 0, 1);
885 
886 		switch(state.sourceFormat)
887 		{
888 		case VK_FORMAT_A8B8G8R8_SINT_PACK32:
889 		case VK_FORMAT_R8G8B8A8_SINT:
890 			c = Insert(c, Int(*Pointer<SByte>(element + 3)), 3);
891 			c = Insert(c, Int(*Pointer<SByte>(element + 2)), 2);
892 		case VK_FORMAT_R8G8_SINT:
893 			c = Insert(c, Int(*Pointer<SByte>(element + 1)), 1);
894 		case VK_FORMAT_R8_SINT:
895 			c = Insert(c, Int(*Pointer<SByte>(element)), 0);
896 			break;
897 		case VK_FORMAT_A2B10G10R10_UINT_PACK32:
898 			c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x000003FF))), 0);
899 			c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10), 1);
900 			c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20), 2);
901 			c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30), 3);
902 			break;
903 		case VK_FORMAT_A8B8G8R8_UINT_PACK32:
904 		case VK_FORMAT_R8G8B8A8_UINT:
905 			c = Insert(c, Int(*Pointer<Byte>(element + 3)), 3);
906 			c = Insert(c, Int(*Pointer<Byte>(element + 2)), 2);
907 		case VK_FORMAT_R8G8_UINT:
908 			c = Insert(c, Int(*Pointer<Byte>(element + 1)), 1);
909 		case VK_FORMAT_R8_UINT:
910 			c = Insert(c, Int(*Pointer<Byte>(element)), 0);
911 			break;
912 		case VK_FORMAT_R16G16B16A16_SINT:
913 			c = Insert(c, Int(*Pointer<Short>(element + 6)), 3);
914 			c = Insert(c, Int(*Pointer<Short>(element + 4)), 2);
915 		case VK_FORMAT_R16G16_SINT:
916 			c = Insert(c, Int(*Pointer<Short>(element + 2)), 1);
917 		case VK_FORMAT_R16_SINT:
918 			c = Insert(c, Int(*Pointer<Short>(element)), 0);
919 			break;
920 		case VK_FORMAT_R16G16B16A16_UINT:
921 			c = Insert(c, Int(*Pointer<UShort>(element + 6)), 3);
922 			c = Insert(c, Int(*Pointer<UShort>(element + 4)), 2);
923 		case VK_FORMAT_R16G16_UINT:
924 			c = Insert(c, Int(*Pointer<UShort>(element + 2)), 1);
925 		case VK_FORMAT_R16_UINT:
926 			c = Insert(c, Int(*Pointer<UShort>(element)), 0);
927 			break;
928 		case VK_FORMAT_R32G32B32A32_SINT:
929 		case VK_FORMAT_R32G32B32A32_UINT:
930 			c = *Pointer<Int4>(element);
931 			break;
932 		case VK_FORMAT_R32G32_SINT:
933 		case VK_FORMAT_R32G32_UINT:
934 			c = Insert(c, *Pointer<Int>(element + 4), 1);
935 		case VK_FORMAT_R32_SINT:
936 		case VK_FORMAT_R32_UINT:
937 			c = Insert(c, *Pointer<Int>(element), 0);
938 			break;
939 		default:
940 			return false;
941 		}
942 
943 		return true;
944 	}
945 
write(Int4 & c,Pointer<Byte> element,const State & state)946 	bool Blitter::write(Int4 &c, Pointer<Byte> element, const State &state)
947 	{
948 		bool writeR = state.writeRed;
949 		bool writeG = state.writeGreen;
950 		bool writeB = state.writeBlue;
951 		bool writeA = state.writeAlpha;
952 		bool writeRGBA = writeR && writeG && writeB && writeA;
953 
954 		switch(state.destFormat)
955 		{
956 		case VK_FORMAT_A2B10G10R10_UINT_PACK32:
957 			c = Min(As<UInt4>(c), UInt4(0x03FF, 0x03FF, 0x03FF, 0x0003));
958 			break;
959 		case VK_FORMAT_A8B8G8R8_UINT_PACK32:
960 		case VK_FORMAT_R8G8B8A8_UINT:
961 		case VK_FORMAT_R8G8B8_UINT:
962 		case VK_FORMAT_R8G8_UINT:
963 		case VK_FORMAT_R8_UINT:
964 		case VK_FORMAT_R8G8B8A8_USCALED:
965 		case VK_FORMAT_R8G8B8_USCALED:
966 		case VK_FORMAT_R8G8_USCALED:
967 		case VK_FORMAT_R8_USCALED:
968 			c = Min(As<UInt4>(c), UInt4(0xFF));
969 			break;
970 		case VK_FORMAT_R16G16B16A16_UINT:
971 		case VK_FORMAT_R16G16B16_UINT:
972 		case VK_FORMAT_R16G16_UINT:
973 		case VK_FORMAT_R16_UINT:
974 		case VK_FORMAT_R16G16B16A16_USCALED:
975 		case VK_FORMAT_R16G16B16_USCALED:
976 		case VK_FORMAT_R16G16_USCALED:
977 		case VK_FORMAT_R16_USCALED:
978 			c = Min(As<UInt4>(c), UInt4(0xFFFF));
979 			break;
980 		case VK_FORMAT_A8B8G8R8_SINT_PACK32:
981 		case VK_FORMAT_R8G8B8A8_SINT:
982 		case VK_FORMAT_R8G8_SINT:
983 		case VK_FORMAT_R8_SINT:
984 		case VK_FORMAT_R8G8B8A8_SSCALED:
985 		case VK_FORMAT_R8G8B8_SSCALED:
986 		case VK_FORMAT_R8G8_SSCALED:
987 		case VK_FORMAT_R8_SSCALED:
988 			c = Min(Max(c, Int4(-0x80)), Int4(0x7F));
989 			break;
990 		case VK_FORMAT_R16G16B16A16_SINT:
991 		case VK_FORMAT_R16G16B16_SINT:
992 		case VK_FORMAT_R16G16_SINT:
993 		case VK_FORMAT_R16_SINT:
994 		case VK_FORMAT_R16G16B16A16_SSCALED:
995 		case VK_FORMAT_R16G16B16_SSCALED:
996 		case VK_FORMAT_R16G16_SSCALED:
997 		case VK_FORMAT_R16_SSCALED:
998 			c = Min(Max(c, Int4(-0x8000)), Int4(0x7FFF));
999 			break;
1000 		default:
1001 			break;
1002 		}
1003 
1004 		switch(state.destFormat)
1005 		{
1006 		case VK_FORMAT_B8G8R8A8_SINT:
1007 		case VK_FORMAT_B8G8R8A8_SSCALED:
1008 			if(writeA) { *Pointer<SByte>(element + 3) = SByte(Extract(c, 3)); }
1009 		case VK_FORMAT_B8G8R8_SINT:
1010 		case VK_FORMAT_B8G8R8_SRGB:
1011 		case VK_FORMAT_B8G8R8_SSCALED:
1012 			if(writeB) { *Pointer<SByte>(element) = SByte(Extract(c, 2)); }
1013 			if(writeG) { *Pointer<SByte>(element + 1) = SByte(Extract(c, 1)); }
1014 			if(writeR) { *Pointer<SByte>(element + 2) = SByte(Extract(c, 0)); }
1015 			break;
1016 		case VK_FORMAT_A8B8G8R8_SINT_PACK32:
1017 		case VK_FORMAT_R8G8B8A8_SINT:
1018 		case VK_FORMAT_R8G8B8A8_SSCALED:
1019 		case VK_FORMAT_A8B8G8R8_SSCALED_PACK32:
1020 			if(writeA) { *Pointer<SByte>(element + 3) = SByte(Extract(c, 3)); }
1021 		case VK_FORMAT_R8G8B8_SINT:
1022 		case VK_FORMAT_R8G8B8_SSCALED:
1023 			if(writeB) { *Pointer<SByte>(element + 2) = SByte(Extract(c, 2)); }
1024 		case VK_FORMAT_R8G8_SINT:
1025 		case VK_FORMAT_R8G8_SSCALED:
1026 			if(writeG) { *Pointer<SByte>(element + 1) = SByte(Extract(c, 1)); }
1027 		case VK_FORMAT_R8_SINT:
1028 		case VK_FORMAT_R8_SSCALED:
1029 			if(writeR) { *Pointer<SByte>(element) = SByte(Extract(c, 0)); }
1030 			break;
1031 		case VK_FORMAT_A2B10G10R10_UINT_PACK32:
1032 		case VK_FORMAT_A2B10G10R10_SINT_PACK32:
1033 		case VK_FORMAT_A2B10G10R10_USCALED_PACK32:
1034 		case VK_FORMAT_A2B10G10R10_SSCALED_PACK32:
1035 			if(writeRGBA)
1036 			{
1037 				*Pointer<UInt>(element) =
1038 					UInt((Extract(c, 0)) | (Extract(c, 1) << 10) | (Extract(c, 2) << 20) | (Extract(c, 3) << 30));
1039 			}
1040 			else
1041 			{
1042 				unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
1043 				                    (writeB ? 0x3FF00000 : 0x0000) |
1044 				                    (writeG ? 0x000FFC00 : 0x0000) |
1045 				                    (writeR ? 0x000003FF : 0x0000);
1046 				unsigned int unmask = ~mask;
1047 				*Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
1048 					(UInt(Extract(c, 0) | (Extract(c, 1) << 10) | (Extract(c, 2) << 20) | (Extract(c, 3) << 30)) & UInt(mask));
1049 			}
1050 			break;
1051 		case VK_FORMAT_A2R10G10B10_UINT_PACK32:
1052 		case VK_FORMAT_A2R10G10B10_SINT_PACK32:
1053 		case VK_FORMAT_A2R10G10B10_USCALED_PACK32:
1054 		case VK_FORMAT_A2R10G10B10_SSCALED_PACK32:
1055 			if(writeRGBA)
1056 			{
1057 				*Pointer<UInt>(element) =
1058 					UInt((Extract(c, 2)) | (Extract(c, 1) << 10) | (Extract(c, 0) << 20) | (Extract(c, 3) << 30));
1059 			}
1060 			else
1061 			{
1062 				unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
1063 				                    (writeR ? 0x3FF00000 : 0x0000) |
1064 				                    (writeG ? 0x000FFC00 : 0x0000) |
1065 				                    (writeB ? 0x000003FF : 0x0000);
1066 				unsigned int unmask = ~mask;
1067 				*Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
1068 					(UInt(Extract(c, 2) | (Extract(c, 1) << 10) | (Extract(c, 0) << 20) | (Extract(c, 3) << 30)) & UInt(mask));
1069 			}
1070 			break;
1071 		case VK_FORMAT_B8G8R8A8_UINT:
1072 		case VK_FORMAT_B8G8R8A8_USCALED:
1073 			if(writeA) { *Pointer<Byte>(element + 3) = Byte(Extract(c, 3)); }
1074 		case VK_FORMAT_B8G8R8_UINT:
1075 		case VK_FORMAT_B8G8R8_USCALED:
1076 			if(writeB) { *Pointer<Byte>(element) = Byte(Extract(c, 2)); }
1077 			if(writeG) { *Pointer<Byte>(element + 1) = Byte(Extract(c, 1)); }
1078 			if(writeR) { *Pointer<Byte>(element + 2) = Byte(Extract(c, 0)); }
1079 			break;
1080 		case VK_FORMAT_A8B8G8R8_UINT_PACK32:
1081 		case VK_FORMAT_R8G8B8A8_UINT:
1082 		case VK_FORMAT_R8G8B8A8_USCALED:
1083 		case VK_FORMAT_A8B8G8R8_USCALED_PACK32:
1084 			if(writeA) { *Pointer<Byte>(element + 3) = Byte(Extract(c, 3)); }
1085 		case VK_FORMAT_R8G8B8_UINT:
1086 		case VK_FORMAT_R8G8B8_USCALED:
1087 			if(writeB) { *Pointer<Byte>(element + 2) = Byte(Extract(c, 2)); }
1088 		case VK_FORMAT_R8G8_UINT:
1089 		case VK_FORMAT_R8G8_USCALED:
1090 			if(writeG) { *Pointer<Byte>(element + 1) = Byte(Extract(c, 1)); }
1091 		case VK_FORMAT_R8_UINT:
1092 		case VK_FORMAT_R8_USCALED:
1093 			if(writeR) { *Pointer<Byte>(element) = Byte(Extract(c, 0)); }
1094 			break;
1095 		case VK_FORMAT_R16G16B16A16_SINT:
1096 		case VK_FORMAT_R16G16B16A16_SSCALED:
1097 			if(writeA) { *Pointer<Short>(element + 6) = Short(Extract(c, 3)); }
1098 		case VK_FORMAT_R16G16B16_SINT:
1099 		case VK_FORMAT_R16G16B16_SSCALED:
1100 			if(writeB) { *Pointer<Short>(element + 4) = Short(Extract(c, 2)); }
1101 		case VK_FORMAT_R16G16_SINT:
1102 		case VK_FORMAT_R16G16_SSCALED:
1103 			if(writeG) { *Pointer<Short>(element + 2) = Short(Extract(c, 1)); }
1104 		case VK_FORMAT_R16_SINT:
1105 		case VK_FORMAT_R16_SSCALED:
1106 			if(writeR) { *Pointer<Short>(element) = Short(Extract(c, 0)); }
1107 			break;
1108 		case VK_FORMAT_R16G16B16A16_UINT:
1109 		case VK_FORMAT_R16G16B16A16_USCALED:
1110 			if(writeA) { *Pointer<UShort>(element + 6) = UShort(Extract(c, 3)); }
1111 		case VK_FORMAT_R16G16B16_UINT:
1112 		case VK_FORMAT_R16G16B16_USCALED:
1113 			if(writeB) { *Pointer<UShort>(element + 4) = UShort(Extract(c, 2)); }
1114 		case VK_FORMAT_R16G16_UINT:
1115 		case VK_FORMAT_R16G16_USCALED:
1116 			if(writeG) { *Pointer<UShort>(element + 2) = UShort(Extract(c, 1)); }
1117 		case VK_FORMAT_R16_UINT:
1118 		case VK_FORMAT_R16_USCALED:
1119 			if(writeR) { *Pointer<UShort>(element) = UShort(Extract(c, 0)); }
1120 			break;
1121 		case VK_FORMAT_R32G32B32A32_SINT:
1122 			if(writeRGBA)
1123 			{
1124 				*Pointer<Int4>(element) = c;
1125 			}
1126 			else
1127 			{
1128 				if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
1129 				if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
1130 				if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); }
1131 				if(writeA) { *Pointer<Int>(element + 12) = Extract(c, 3); }
1132 			}
1133 			break;
1134 		case VK_FORMAT_R32G32B32_SINT:
1135 			if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
1136 			if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
1137 			if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); }
1138 			break;
1139 		case VK_FORMAT_R32G32_SINT:
1140 			if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
1141 			if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
1142 			break;
1143 		case VK_FORMAT_R32_SINT:
1144 			if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
1145 			break;
1146 		case VK_FORMAT_R32G32B32A32_UINT:
1147 			if(writeRGBA)
1148 			{
1149 				*Pointer<UInt4>(element) = As<UInt4>(c);
1150 			}
1151 			else
1152 			{
1153 				if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
1154 				if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
1155 				if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); }
1156 				if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(Extract(c, 3)); }
1157 			}
1158 			break;
1159 		case VK_FORMAT_R32G32B32_UINT:
1160 			if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); }
1161 		case VK_FORMAT_R32G32_UINT:
1162 			if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
1163 		case VK_FORMAT_R32_UINT:
1164 			if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
1165 			break;
1166 		default:
1167 			return false;
1168 		}
1169 
1170 		return true;
1171 	}
1172 
GetScale(float4 & scale,VkFormat format)1173 	bool Blitter::GetScale(float4 &scale, VkFormat format)
1174 	{
1175 		switch(format)
1176 		{
1177 		case VK_FORMAT_R4G4_UNORM_PACK8:
1178 		case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
1179 		case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
1180 			scale = vector(0xF, 0xF, 0xF, 0xF);
1181 			break;
1182 		case VK_FORMAT_R8_UNORM:
1183 		case VK_FORMAT_R8G8_UNORM:
1184 		case VK_FORMAT_R8G8B8_UNORM:
1185 		case VK_FORMAT_B8G8R8_UNORM:
1186 		case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
1187 		case VK_FORMAT_R8G8B8A8_UNORM:
1188 		case VK_FORMAT_B8G8R8A8_UNORM:
1189 		case VK_FORMAT_R8_SRGB:
1190 		case VK_FORMAT_R8G8_SRGB:
1191 		case VK_FORMAT_R8G8B8_SRGB:
1192 		case VK_FORMAT_B8G8R8_SRGB:
1193 		case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
1194 		case VK_FORMAT_R8G8B8A8_SRGB:
1195 		case VK_FORMAT_B8G8R8A8_SRGB:
1196 			scale = vector(0xFF, 0xFF, 0xFF, 0xFF);
1197 			break;
1198 		case VK_FORMAT_R8_SNORM:
1199 		case VK_FORMAT_R8G8_SNORM:
1200 		case VK_FORMAT_R8G8B8_SNORM:
1201 		case VK_FORMAT_B8G8R8_SNORM:
1202 		case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
1203 		case VK_FORMAT_R8G8B8A8_SNORM:
1204 		case VK_FORMAT_B8G8R8A8_SNORM:
1205 			scale = vector(0x7F, 0x7F, 0x7F, 0x7F);
1206 			break;
1207 		case VK_FORMAT_R16_UNORM:
1208 		case VK_FORMAT_R16G16_UNORM:
1209 		case VK_FORMAT_R16G16B16_UNORM:
1210 		case VK_FORMAT_R16G16B16A16_UNORM:
1211 			scale = vector(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF);
1212 			break;
1213 		case VK_FORMAT_R16_SNORM:
1214 		case VK_FORMAT_R16G16_SNORM:
1215 		case VK_FORMAT_R16G16B16_SNORM:
1216 		case VK_FORMAT_R16G16B16A16_SNORM:
1217 			scale = vector(0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF);
1218 			break;
1219 		case VK_FORMAT_R8_SINT:
1220 		case VK_FORMAT_R8_UINT:
1221 		case VK_FORMAT_R8G8_SINT:
1222 		case VK_FORMAT_R8G8_UINT:
1223 		case VK_FORMAT_R8G8B8_SINT:
1224 		case VK_FORMAT_R8G8B8_UINT:
1225 		case VK_FORMAT_B8G8R8_SINT:
1226 		case VK_FORMAT_B8G8R8_UINT:
1227 		case VK_FORMAT_R8G8B8A8_SINT:
1228 		case VK_FORMAT_R8G8B8A8_UINT:
1229 		case VK_FORMAT_A8B8G8R8_SINT_PACK32:
1230 		case VK_FORMAT_A8B8G8R8_UINT_PACK32:
1231 		case VK_FORMAT_B8G8R8A8_SINT:
1232 		case VK_FORMAT_B8G8R8A8_UINT:
1233 		case VK_FORMAT_R8_USCALED:
1234 		case VK_FORMAT_R8G8_USCALED:
1235 		case VK_FORMAT_R8G8B8_USCALED:
1236 		case VK_FORMAT_B8G8R8_USCALED:
1237 		case VK_FORMAT_R8G8B8A8_USCALED:
1238 		case VK_FORMAT_B8G8R8A8_USCALED:
1239 		case VK_FORMAT_A8B8G8R8_USCALED_PACK32:
1240 		case VK_FORMAT_R8_SSCALED:
1241 		case VK_FORMAT_R8G8_SSCALED:
1242 		case VK_FORMAT_R8G8B8_SSCALED:
1243 		case VK_FORMAT_B8G8R8_SSCALED:
1244 		case VK_FORMAT_R8G8B8A8_SSCALED:
1245 		case VK_FORMAT_B8G8R8A8_SSCALED:
1246 		case VK_FORMAT_A8B8G8R8_SSCALED_PACK32:
1247 		case VK_FORMAT_R16_SINT:
1248 		case VK_FORMAT_R16_UINT:
1249 		case VK_FORMAT_R16G16_SINT:
1250 		case VK_FORMAT_R16G16_UINT:
1251 		case VK_FORMAT_R16G16B16A16_SINT:
1252 		case VK_FORMAT_R16G16B16A16_UINT:
1253 		case VK_FORMAT_R16_SSCALED:
1254 		case VK_FORMAT_R16G16_SSCALED:
1255 		case VK_FORMAT_R16G16B16_SSCALED:
1256 		case VK_FORMAT_R16G16B16A16_SSCALED:
1257 		case VK_FORMAT_R16_USCALED:
1258 		case VK_FORMAT_R16G16_USCALED:
1259 		case VK_FORMAT_R16G16B16_USCALED:
1260 		case VK_FORMAT_R16G16B16A16_USCALED:
1261 		case VK_FORMAT_R32_SINT:
1262 		case VK_FORMAT_R32_UINT:
1263 		case VK_FORMAT_R32G32_SINT:
1264 		case VK_FORMAT_R32G32_UINT:
1265 		case VK_FORMAT_R32G32B32_SINT:
1266 		case VK_FORMAT_R32G32B32_UINT:
1267 		case VK_FORMAT_R32G32B32A32_SINT:
1268 		case VK_FORMAT_R32G32B32A32_UINT:
1269 		case VK_FORMAT_R32G32B32A32_SFLOAT:
1270 		case VK_FORMAT_R32G32B32_SFLOAT:
1271 		case VK_FORMAT_R32G32_SFLOAT:
1272 		case VK_FORMAT_R32_SFLOAT:
1273 		case VK_FORMAT_R16G16B16A16_SFLOAT:
1274 		case VK_FORMAT_R16G16B16_SFLOAT:
1275 		case VK_FORMAT_R16G16_SFLOAT:
1276 		case VK_FORMAT_R16_SFLOAT:
1277 		case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
1278 		case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
1279 		case VK_FORMAT_A2R10G10B10_USCALED_PACK32:
1280 		case VK_FORMAT_A2R10G10B10_SSCALED_PACK32:
1281 		case VK_FORMAT_A2R10G10B10_UINT_PACK32:
1282 		case VK_FORMAT_A2R10G10B10_SINT_PACK32:
1283 		case VK_FORMAT_A2B10G10R10_USCALED_PACK32:
1284 		case VK_FORMAT_A2B10G10R10_SSCALED_PACK32:
1285 		case VK_FORMAT_A2B10G10R10_UINT_PACK32:
1286 		case VK_FORMAT_A2B10G10R10_SINT_PACK32:
1287 			scale = vector(1.0f, 1.0f, 1.0f, 1.0f);
1288 			break;
1289 		case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
1290 		case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
1291 		case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
1292 			scale = vector(0x1F, 0x1F, 0x1F, 0x01);
1293 			break;
1294 		case VK_FORMAT_R5G6B5_UNORM_PACK16:
1295 		case VK_FORMAT_B5G6R5_UNORM_PACK16:
1296 			scale = vector(0x1F, 0x3F, 0x1F, 1.0f);
1297 			break;
1298 		case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
1299 		case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
1300 			scale = vector(0x3FF, 0x3FF, 0x3FF, 0x03);
1301 			break;
1302 		case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
1303 		case VK_FORMAT_A2B10G10R10_SNORM_PACK32:
1304 			scale = vector(0x1FF, 0x1FF, 0x1FF, 0x01);
1305 			break;
1306 		case VK_FORMAT_D16_UNORM:
1307 			scale = vector(0xFFFF, 0.0f, 0.0f, 0.0f);
1308 			break;
1309 		case VK_FORMAT_D24_UNORM_S8_UINT:
1310 		case VK_FORMAT_X8_D24_UNORM_PACK32:
1311 			scale = vector(0xFFFFFF, 0.0f, 0.0f, 0.0f);
1312 			break;
1313 		case VK_FORMAT_D32_SFLOAT:
1314 		case VK_FORMAT_D32_SFLOAT_S8_UINT:
1315 		case VK_FORMAT_S8_UINT:
1316 			scale = vector(1.0f, 1.0f, 1.0f, 1.0f);
1317 			break;
1318 		default:
1319 			return false;
1320 		}
1321 
1322 		return true;
1323 	}
1324 
ApplyScaleAndClamp(Float4 & value,const State & state,bool preScaled)1325 	bool Blitter::ApplyScaleAndClamp(Float4 &value, const State &state, bool preScaled)
1326 	{
1327 		float4 scale, unscale;
1328 		if(state.clearOperation &&
1329 		   Surface::isNonNormalizedInteger(state.sourceFormat) &&
1330 		   !Surface::isNonNormalizedInteger(state.destFormat))
1331 		{
1332 			// If we're clearing a buffer from an int or uint color into a normalized color,
1333 			// then the whole range of the int or uint color must be scaled between 0 and 1.
1334 			switch(state.sourceFormat)
1335 			{
1336 			case VK_FORMAT_R32G32B32A32_SINT:
1337 				unscale = replicate(static_cast<float>(0x7FFFFFFF));
1338 				break;
1339 			case VK_FORMAT_R32G32B32A32_UINT:
1340 				unscale = replicate(static_cast<float>(0xFFFFFFFF));
1341 				break;
1342 			default:
1343 				return false;
1344 			}
1345 		}
1346 		else if(!GetScale(unscale, state.sourceFormat))
1347 		{
1348 			return false;
1349 		}
1350 
1351 		if(!GetScale(scale, state.destFormat))
1352 		{
1353 			return false;
1354 		}
1355 
1356 		bool srcSRGB = Surface::isSRGBformat(state.sourceFormat);
1357 		bool dstSRGB = Surface::isSRGBformat(state.destFormat);
1358 
1359 		if(state.convertSRGB && ((srcSRGB && !preScaled) || dstSRGB))   // One of the formats is sRGB encoded.
1360 		{
1361 			value *= preScaled ? Float4(1.0f / scale.x, 1.0f / scale.y, 1.0f / scale.z, 1.0f / scale.w) : // Unapply scale
1362 			                     Float4(1.0f / unscale.x, 1.0f / unscale.y, 1.0f / unscale.z, 1.0f / unscale.w); // Apply unscale
1363 			value = (srcSRGB && !preScaled) ? sRGBtoLinear(value) : LinearToSRGB(value);
1364 			value *= Float4(scale.x, scale.y, scale.z, scale.w); // Apply scale
1365 		}
1366 		else if(unscale != scale)
1367 		{
1368 			value *= Float4(scale.x / unscale.x, scale.y / unscale.y, scale.z / unscale.z, scale.w / unscale.w);
1369 		}
1370 
1371 		if(Surface::isFloatFormat(state.sourceFormat) && !Surface::isFloatFormat(state.destFormat))
1372 		{
1373 			value = Min(value, Float4(scale.x, scale.y, scale.z, scale.w));
1374 
1375 			value = Max(value, Float4(Surface::isUnsignedComponent(state.destFormat, 0) ? 0.0f : -scale.x,
1376 			                          Surface::isUnsignedComponent(state.destFormat, 1) ? 0.0f : -scale.y,
1377 			                          Surface::isUnsignedComponent(state.destFormat, 2) ? 0.0f : -scale.z,
1378 			                          Surface::isUnsignedComponent(state.destFormat, 3) ? 0.0f : -scale.w));
1379 		}
1380 
1381 		return true;
1382 	}
1383 
ComputeOffset(Int & x,Int & y,Int & pitchB,int bytes,bool quadLayout)1384 	Int Blitter::ComputeOffset(Int &x, Int &y, Int &pitchB, int bytes, bool quadLayout)
1385 	{
1386 		if(!quadLayout)
1387 		{
1388 			return y * pitchB + x * bytes;
1389 		}
1390 		else
1391 		{
1392 			// (x & ~1) * 2 + (x & 1) == (x - (x & 1)) * 2 + (x & 1) == x * 2 - (x & 1) * 2 + (x & 1) == x * 2 - (x & 1)
1393 			return (y & Int(~1)) * pitchB +
1394 			       ((y & Int(1)) * 2 + x * 2 - (x & Int(1))) * bytes;
1395 		}
1396 	}
1397 
LinearToSRGB(Float4 & c)1398 	Float4 Blitter::LinearToSRGB(Float4 &c)
1399 	{
1400 		Float4 lc = Min(c, Float4(0.0031308f)) * Float4(12.92f);
1401 		Float4 ec = Float4(1.055f) * power(c, Float4(1.0f / 2.4f)) - Float4(0.055f);
1402 
1403 		Float4 s = c;
1404 		s.xyz = Max(lc, ec);
1405 
1406 		return s;
1407 	}
1408 
sRGBtoLinear(Float4 & c)1409 	Float4 Blitter::sRGBtoLinear(Float4 &c)
1410 	{
1411 		Float4 lc = c * Float4(1.0f / 12.92f);
1412 		Float4 ec = power((c + Float4(0.055f)) * Float4(1.0f / 1.055f), Float4(2.4f));
1413 
1414 		Int4 linear = CmpLT(c, Float4(0.04045f));
1415 
1416 		Float4 s = c;
1417 		s.xyz = As<Float4>((linear & As<Int4>(lc)) | (~linear & As<Int4>(ec)));   // FIXME: IfThenElse()
1418 
1419 		return s;
1420 	}
1421 
generate(const State & state)1422 	Routine *Blitter::generate(const State &state)
1423 	{
1424 		Function<Void(Pointer<Byte>)> function;
1425 		{
1426 			Pointer<Byte> blit(function.Arg<0>());
1427 
1428 			Pointer<Byte> source = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData,source));
1429 			Pointer<Byte> dest = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData,dest));
1430 			Int sPitchB = *Pointer<Int>(blit + OFFSET(BlitData,sPitchB));
1431 			Int dPitchB = *Pointer<Int>(blit + OFFSET(BlitData,dPitchB));
1432 
1433 			Float x0 = *Pointer<Float>(blit + OFFSET(BlitData,x0));
1434 			Float y0 = *Pointer<Float>(blit + OFFSET(BlitData,y0));
1435 			Float w = *Pointer<Float>(blit + OFFSET(BlitData,w));
1436 			Float h = *Pointer<Float>(blit + OFFSET(BlitData,h));
1437 
1438 			Int x0d = *Pointer<Int>(blit + OFFSET(BlitData,x0d));
1439 			Int x1d = *Pointer<Int>(blit + OFFSET(BlitData,x1d));
1440 			Int y0d = *Pointer<Int>(blit + OFFSET(BlitData,y0d));
1441 			Int y1d = *Pointer<Int>(blit + OFFSET(BlitData,y1d));
1442 
1443 			Int sWidth = *Pointer<Int>(blit + OFFSET(BlitData,sWidth));
1444 			Int sHeight = *Pointer<Int>(blit + OFFSET(BlitData,sHeight));
1445 
1446 			bool intSrc = Surface::isNonNormalizedInteger(state.sourceFormat);
1447 			bool intDst = Surface::isNonNormalizedInteger(state.destFormat);
1448 			bool intBoth = intSrc && intDst;
1449 			bool srcQuadLayout = Surface::hasQuadLayout(state.sourceFormat);
1450 			bool dstQuadLayout = Surface::hasQuadLayout(state.destFormat);
1451 			int srcBytes = Surface::bytes(state.sourceFormat);
1452 			int dstBytes = Surface::bytes(state.destFormat);
1453 
1454 			bool hasConstantColorI = false;
1455 			Int4 constantColorI;
1456 			bool hasConstantColorF = false;
1457 			Float4 constantColorF;
1458 			if(state.clearOperation)
1459 			{
1460 				if(intBoth) // Integer types
1461 				{
1462 					if(!read(constantColorI, source, state))
1463 					{
1464 						return nullptr;
1465 					}
1466 					hasConstantColorI = true;
1467 				}
1468 				else
1469 				{
1470 					if(!read(constantColorF, source, state))
1471 					{
1472 						return nullptr;
1473 					}
1474 					hasConstantColorF = true;
1475 
1476 					if(!ApplyScaleAndClamp(constantColorF, state))
1477 					{
1478 						return nullptr;
1479 					}
1480 				}
1481 			}
1482 
1483 			For(Int j = y0d, j < y1d, j++)
1484 			{
1485 				Float y = state.clearOperation ? RValue<Float>(y0) : y0 + Float(j) * h;
1486 				Pointer<Byte> destLine = dest + (dstQuadLayout ? j & Int(~1) : RValue<Int>(j)) * dPitchB;
1487 
1488 				For(Int i = x0d, i < x1d, i++)
1489 				{
1490 					Float x = state.clearOperation ? RValue<Float>(x0) : x0 + Float(i) * w;
1491 					Pointer<Byte> d = destLine + (dstQuadLayout ? (((j & Int(1)) << 1) + (i * 2) - (i & Int(1))) : RValue<Int>(i)) * dstBytes;
1492 
1493 					if(hasConstantColorI)
1494 					{
1495 						if(!write(constantColorI, d, state))
1496 						{
1497 							return nullptr;
1498 						}
1499 					}
1500 					else if(hasConstantColorF)
1501 					{
1502 						for(int s = 0; s < state.destSamples; s++)
1503 						{
1504 							if(!write(constantColorF, d, state))
1505 							{
1506 								return nullptr;
1507 							}
1508 
1509 							d += *Pointer<Int>(blit + OFFSET(BlitData, dSliceB));
1510 						}
1511 					}
1512 					else if(intBoth) // Integer types do not support filtering
1513 					{
1514 						Int4 color; // When both formats are true integer types, we don't go to float to avoid losing precision
1515 						Int X = Int(x);
1516 						Int Y = Int(y);
1517 
1518 						if(state.clampToEdge)
1519 						{
1520 							X = Clamp(X, 0, sWidth - 1);
1521 							Y = Clamp(Y, 0, sHeight - 1);
1522 						}
1523 
1524 						Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes, srcQuadLayout);
1525 
1526 						if(!read(color, s, state))
1527 						{
1528 							return nullptr;
1529 						}
1530 
1531 						if(!write(color, d, state))
1532 						{
1533 							return nullptr;
1534 						}
1535 					}
1536 					else
1537 					{
1538 						Float4 color;
1539 
1540 						bool preScaled = false;
1541 						if(!state.filter || intSrc)
1542 						{
1543 							Int X = Int(x);
1544 							Int Y = Int(y);
1545 
1546 							if(state.clampToEdge)
1547 							{
1548 								X = Clamp(X, 0, sWidth - 1);
1549 								Y = Clamp(Y, 0, sHeight - 1);
1550 							}
1551 
1552 							Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes, srcQuadLayout);
1553 
1554 							if(!read(color, s, state))
1555 							{
1556 								return nullptr;
1557 							}
1558 						}
1559 						else   // Bilinear filtering
1560 						{
1561 							Float X = x;
1562 							Float Y = y;
1563 
1564 							if(state.clampToEdge)
1565 							{
1566 								X = Min(Max(x, 0.5f), Float(sWidth) - 0.5f);
1567 								Y = Min(Max(y, 0.5f), Float(sHeight) - 0.5f);
1568 							}
1569 
1570 							Float x0 = X - 0.5f;
1571 							Float y0 = Y - 0.5f;
1572 
1573 							Int X0 = Max(Int(x0), 0);
1574 							Int Y0 = Max(Int(y0), 0);
1575 
1576 							Int X1 = X0 + 1;
1577 							Int Y1 = Y0 + 1;
1578 							X1 = IfThenElse(X1 >= sWidth, X0, X1);
1579 							Y1 = IfThenElse(Y1 >= sHeight, Y0, Y1);
1580 
1581 							Pointer<Byte> s00 = source + ComputeOffset(X0, Y0, sPitchB, srcBytes, srcQuadLayout);
1582 							Pointer<Byte> s01 = source + ComputeOffset(X1, Y0, sPitchB, srcBytes, srcQuadLayout);
1583 							Pointer<Byte> s10 = source + ComputeOffset(X0, Y1, sPitchB, srcBytes, srcQuadLayout);
1584 							Pointer<Byte> s11 = source + ComputeOffset(X1, Y1, sPitchB, srcBytes, srcQuadLayout);
1585 
1586 							Float4 c00; if(!read(c00, s00, state)) return nullptr;
1587 							Float4 c01; if(!read(c01, s01, state)) return nullptr;
1588 							Float4 c10; if(!read(c10, s10, state)) return nullptr;
1589 							Float4 c11; if(!read(c11, s11, state)) return nullptr;
1590 
1591 							if(state.convertSRGB && Surface::isSRGBformat(state.sourceFormat)) // sRGB -> RGB
1592 							{
1593 								if(!ApplyScaleAndClamp(c00, state)) return nullptr;
1594 								if(!ApplyScaleAndClamp(c01, state)) return nullptr;
1595 								if(!ApplyScaleAndClamp(c10, state)) return nullptr;
1596 								if(!ApplyScaleAndClamp(c11, state)) return nullptr;
1597 								preScaled = true;
1598 							}
1599 
1600 							Float4 fx = Float4(x0 - Float(X0));
1601 							Float4 fy = Float4(y0 - Float(Y0));
1602 							Float4 ix = Float4(1.0f) - fx;
1603 							Float4 iy = Float4(1.0f) - fy;
1604 
1605 							color = (c00 * ix + c01 * fx) * iy +
1606 							        (c10 * ix + c11 * fx) * fy;
1607 						}
1608 
1609 						if(!ApplyScaleAndClamp(color, state, preScaled))
1610 						{
1611 							return nullptr;
1612 						}
1613 
1614 						for(int s = 0; s < state.destSamples; s++)
1615 						{
1616 							if(!write(color, d, state))
1617 							{
1618 								return nullptr;
1619 							}
1620 
1621 							d += *Pointer<Int>(blit + OFFSET(BlitData,dSliceB));
1622 						}
1623 					}
1624 				}
1625 			}
1626 		}
1627 
1628 		return function("BlitRoutine");
1629 	}
1630 
blitReactor(Surface * source,const SliceRectF & sourceRect,Surface * dest,const SliceRect & destRect,const Blitter::Options & options)1631 	bool Blitter::blitReactor(Surface *source, const SliceRectF &sourceRect, Surface *dest, const SliceRect &destRect, const Blitter::Options &options)
1632 	{
1633 		ASSERT(!options.clearOperation || ((source->getWidth() == 1) && (source->getHeight() == 1) && (source->getDepth() == 1)));
1634 
1635 		Rect dRect = destRect;
1636 		RectF sRect = sourceRect;
1637 		if(destRect.x0 > destRect.x1)
1638 		{
1639 			swap(dRect.x0, dRect.x1);
1640 			swap(sRect.x0, sRect.x1);
1641 		}
1642 		if(destRect.y0 > destRect.y1)
1643 		{
1644 			swap(dRect.y0, dRect.y1);
1645 			swap(sRect.y0, sRect.y1);
1646 		}
1647 
1648 		State state(options);
1649 		state.clampToEdge = (sourceRect.x0 < 0.0f) ||
1650 		                    (sourceRect.y0 < 0.0f) ||
1651 		                    (sourceRect.x1 > (float)source->getWidth()) ||
1652 		                    (sourceRect.y1 > (float)source->getHeight());
1653 
1654 		bool useSourceInternal = !source->isExternalDirty();
1655 		bool useDestInternal = !dest->isExternalDirty();
1656 		bool isStencil = options.useStencil;
1657 
1658 		state.sourceFormat = isStencil ? source->getStencilFormat() : source->getFormat(useSourceInternal);
1659 		state.destFormat = isStencil ? dest->getStencilFormat() : dest->getFormat(useDestInternal);
1660 		state.destSamples = dest->getSamples();
1661 
1662 		criticalSection.lock();
1663 		Routine *blitRoutine = blitCache->query(state);
1664 
1665 		if(!blitRoutine)
1666 		{
1667 			blitRoutine = generate(state);
1668 
1669 			if(!blitRoutine)
1670 			{
1671 				criticalSection.unlock();
1672 				return false;
1673 			}
1674 
1675 			blitCache->add(state, blitRoutine);
1676 		}
1677 
1678 		criticalSection.unlock();
1679 
1680 		void (*blitFunction)(const BlitData *data) = (void(*)(const BlitData*))blitRoutine->getEntry();
1681 
1682 		BlitData data;
1683 
1684 		bool isRGBA = options.writeMask == 0xF;
1685 		bool isEntireDest = dest->isEntire(destRect);
1686 
1687 		data.source = isStencil ? source->lockStencil(0, 0, 0, sw::PUBLIC) :
1688 		                          source->lock(0, 0, sourceRect.slice, sw::LOCK_READONLY, sw::PUBLIC, useSourceInternal);
1689 		data.dest = isStencil ? dest->lockStencil(0, 0, 0, sw::PUBLIC) :
1690 		                        dest->lock(0, 0, destRect.slice, isRGBA ? (isEntireDest ? sw::LOCK_DISCARD : sw::LOCK_WRITEONLY) : sw::LOCK_READWRITE, sw::PUBLIC, useDestInternal);
1691 		data.sPitchB = isStencil ? source->getStencilPitchB() : source->getPitchB(useSourceInternal);
1692 		data.dPitchB = isStencil ? dest->getStencilPitchB() : dest->getPitchB(useDestInternal);
1693 		data.dSliceB = isStencil ? dest->getStencilSliceB() : dest->getSliceB(useDestInternal);
1694 
1695 		data.w = sRect.width() / dRect.width();
1696 		data.h = sRect.height() / dRect.height();
1697 		data.x0 = sRect.x0 + (0.5f - dRect.x0) * data.w;
1698 		data.y0 = sRect.y0 + (0.5f - dRect.y0) * data.h;
1699 
1700 		data.x0d = dRect.x0;
1701 		data.x1d = dRect.x1;
1702 		data.y0d = dRect.y0;
1703 		data.y1d = dRect.y1;
1704 
1705 		data.sWidth = source->getWidth();
1706 		data.sHeight = source->getHeight();
1707 
1708 		blitFunction(&data);
1709 
1710 		if(isStencil)
1711 		{
1712 			source->unlockStencil();
1713 			dest->unlockStencil();
1714 		}
1715 		else
1716 		{
1717 			source->unlock(useSourceInternal);
1718 			dest->unlock(useDestInternal);
1719 		}
1720 
1721 		return true;
1722 	}
1723 }
1724