1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file Convert.h
24 *
25 * @brief Conversion utility functions
26 *
27 ******************************************************************************/
28 #pragma once
29 
30 #if defined(_WIN32)
31 // disable "potential divide by 0"
32 #pragma warning(disable: 4723)
33 #endif
34 
35 #include <cmath>
36 
37 //////////////////////////////////////////////////////////////////////////
38 /// @brief Convert an IEEE 754 16-bit float to an 32-bit single precision
39 ///        float
40 /// @param val - 16-bit float
41 /// @todo Maybe move this outside of this file into a header?
ConvertSmallFloatTo32(UINT val)42 static INLINE float ConvertSmallFloatTo32(UINT val)
43 {
44     UINT result;
45     if ((val & 0x7fff) == 0)
46     {
47         result = ((uint32_t)(val & 0x8000)) << 16;
48     }
49     else if ((val & 0x7c00) == 0x7c00)
50     {
51         result = ((val & 0x3ff) == 0) ? 0x7f800000 : 0x7fc00000;
52         result |= ((uint32_t)val & 0x8000) << 16;
53     }
54     else
55     {
56         uint32_t sign = (val & 0x8000) << 16;
57         uint32_t mant = (val & 0x3ff) << 13;
58         uint32_t exp = (val >> 10) & 0x1f;
59         if ((exp == 0) && (mant != 0)) // Adjust exponent and mantissa for denormals
60         {
61             mant <<= 1;
62             while (mant < (0x400 << 13))
63             {
64                 exp--;
65                 mant <<= 1;
66             }
67             mant &= (0x3ff << 13);
68         }
69         exp = ((exp - 15 + 127) & 0xff) << 23;
70         result = sign | exp | mant;
71     }
72 
73     return *(float*)&result;
74 }
75 
76 //////////////////////////////////////////////////////////////////////////
77 /// @brief Convert an IEEE 754 32-bit single precision float to an
78 ///        unsigned small float with 5 exponent bits and a variable
79 ///        number of mantissa bits.
80 /// @param val - 32-bit float
81 /// @todo Maybe move this outside of this file into a header?
82 template<UINT numMantissaBits>
Convert32ToSmallFloat(float val)83 static UINT Convert32ToSmallFloat(float val)
84 {
85     uint32_t sign, exp, mant;
86     uint32_t roundBits;
87 
88     // Extract the sign, exponent, and mantissa
89     UINT uf = *(UINT*)&val;
90 
91     sign = (uf & 0x80000000) >> 31;
92     exp = (uf & 0x7F800000) >> 23;
93     mant = uf & 0x007FFFFF;
94 
95     // 10/11 bit floats are unsigned.  Negative values are clamped to 0.
96     if (sign != 0)
97     {
98         exp = mant = 0;
99     }
100     // Check for out of range
101     else if ((exp == 0xFF) && (mant != 0)) // NaN
102     {
103         exp = 0x1F;
104         mant = 1 << numMantissaBits;
105     }
106     else if ((exp == 0xFF) && (mant == 0)) // INF
107     {
108         exp = 0x1F;
109         mant = 0;
110     }
111     else if (exp > (0x70 + 0x1E)) // Too big to represent
112     {
113         exp = 0x1Eu;
114         mant = (1 << numMantissaBits) - 1;  // 0x3F for 6 bit mantissa.
115     }
116     else if ((exp <= 0x70) && (exp >= 0x66)) // It's a denorm
117     {
118         mant |= 0x00800000;
119         for (; exp <= 0x70; mant >>= 1, exp++)
120             ;
121         exp = 0;
122         mant = mant >> (23 - numMantissaBits);
123     }
124     else if (exp < 0x66) // Too small to represent -> Zero
125     {
126         exp = 0;
127         mant = 0;
128     }
129     else
130     {
131         // Saves bits that will be shifted off for rounding
132         roundBits = mant & 0x1FFFu;
133         // convert exponent and mantissa to 16 bit format
134         exp = exp - 0x70u;
135         mant = mant >> (23 - numMantissaBits);
136 
137         // Essentially RTZ, but round up if off by only 1 lsb
138         if (roundBits == 0x1FFFu)
139         {
140             mant++;
141             // check for overflow
142             if ((mant & (0x3 << numMantissaBits)) != 0) // 0x60 = 0x3 << (num Mantissa Bits)
143                 exp++;
144             // make sure only the needed bits are used
145             mant &= (1 << numMantissaBits) - 1;
146         }
147     }
148 
149     UINT tmpVal = (exp << numMantissaBits) | mant;
150     return tmpVal;
151 }
152 
153 #if KNOB_ARCH == KNOB_ARCH_AVX
154 //////////////////////////////////////////////////////////////////////////
155 /// @brief Convert an IEEE 754 32-bit single precision float to an
156 ///        16 bit float with 5 exponent bits and a variable
157 ///        number of mantissa bits.
158 /// @param val - 32-bit float
159 /// @todo Maybe move this outside of this file into a header?
Convert32To16Float(float val)160 static uint16_t Convert32To16Float(float val)
161 {
162     uint32_t sign, exp, mant;
163     uint32_t roundBits;
164 
165     // Extract the sign, exponent, and mantissa
166     uint32_t uf = *(uint32_t*)&val;
167     sign = (uf & 0x80000000) >> 31;
168     exp = (uf & 0x7F800000) >> 23;
169     mant = uf & 0x007FFFFF;
170 
171     // Check for out of range
172     if (std::isnan(val))
173     {
174         exp = 0x1F;
175         mant = 0x200;
176         sign = 1;                     // set the sign bit for NANs
177     }
178     else if (std::isinf(val))
179     {
180         exp = 0x1f;
181         mant = 0x0;
182     }
183     else if (exp > (0x70 + 0x1E)) // Too big to represent -> max representable value
184     {
185         exp = 0x1E;
186         mant = 0x3FF;
187     }
188     else if ((exp <= 0x70) && (exp >= 0x66)) // It's a denorm
189     {
190         mant |= 0x00800000;
191         for (; exp <= 0x70; mant >>= 1, exp++)
192             ;
193         exp = 0;
194         mant = mant >> 13;
195     }
196     else if (exp < 0x66) // Too small to represent -> Zero
197     {
198         exp = 0;
199         mant = 0;
200     }
201     else
202     {
203         // Saves bits that will be shifted off for rounding
204         roundBits = mant & 0x1FFFu;
205         // convert exponent and mantissa to 16 bit format
206         exp = exp - 0x70;
207         mant = mant >> 13;
208 
209         // Essentially RTZ, but round up if off by only 1 lsb
210         if (roundBits == 0x1FFFu)
211         {
212             mant++;
213             // check for overflow
214             if ((mant & 0xC00u) != 0)
215                 exp++;
216             // make sure only the needed bits are used
217             mant &= 0x3FF;
218         }
219     }
220 
221     uint32_t tmpVal = (sign << 15) | (exp << 10) | mant;
222     return (uint16_t)tmpVal;
223 }
224 #endif
225 
226 //////////////////////////////////////////////////////////////////////////
227 /// @brief Retrieve color from hot tile source which is always float.
228 /// @param pDstPixel - Pointer to destination pixel.
229 /// @param srcPixel - Pointer to source pixel (pre-swizzled according to dest).
230 template<SWR_FORMAT DstFormat>
ConvertPixelFromFloat(uint8_t * pDstPixel,const float srcPixel[4])231 static void ConvertPixelFromFloat(
232     uint8_t* pDstPixel,
233     const float srcPixel[4])
234 {
235     uint32_t outColor[4] = { 0 };  // typeless bits
236 
237     // Store component
238     for (UINT comp = 0; comp < FormatTraits<DstFormat>::numComps; ++comp)
239     {
240         SWR_TYPE type = FormatTraits<DstFormat>::GetType(comp);
241 
242         float src = srcPixel[comp];
243 
244         switch (type)
245         {
246         case SWR_TYPE_UNORM:
247         {
248             // Force NaN to 0. IEEE standard, comparisons involving NaN always evaluate to false.
249             src = (src != src) ? 0.0f : src;
250 
251             // Clamp [0, 1]
252             src = std::max(src, 0.0f);
253             src = std::min(src, 1.0f);
254 
255             // SRGB
256             if (FormatTraits<DstFormat>::isSRGB && comp != 3)
257             {
258                 src = (src <= 0.0031308f) ? (12.92f * src) : (1.055f * powf(src, (1.0f / 2.4f)) - 0.055f);
259             }
260 
261             // Float scale to integer scale.
262             UINT scale = (1 << FormatTraits<DstFormat>::GetBPC(comp)) - 1;
263             src = (float)scale * src;
264             src = roundf(src);
265             outColor[comp] = (UINT)src; // Drop fractional part.
266             break;
267         }
268         case SWR_TYPE_SNORM:
269         {
270             SWR_ASSERT(!FormatTraits<DstFormat>::isSRGB);
271 
272             // Force NaN to 0. IEEE standard, comparisons involving NaN always evaluate to false.
273             src = (src != src) ? 0.0f : src;
274 
275             // Clamp [-1, 1]
276             src = std::max(src, -1.0f);
277             src = std::min(src, 1.0f);
278 
279             // Float scale to integer scale.
280             UINT scale = (1 << (FormatTraits<DstFormat>::GetBPC(comp) - 1)) - 1;
281             src = (float)scale * src;
282 
283             // Round
284             src += (src >= 0) ? 0.5f : -0.5f;
285 
286             INT out = (INT)src;
287 
288             outColor[comp] = *(UINT*)&out;
289 
290             break;
291         }
292         case SWR_TYPE_UINT:
293         {
294             ///@note The *(UINT*)& is currently necessary as the hot tile appears to always be float.
295             //       However, the number in the hot tile should be unsigned integer. So doing this
296             //       to preserve bits intead of doing a float -> integer conversion.
297             if (FormatTraits<DstFormat>::GetBPC(comp) == 32)
298             {
299                 outColor[comp] = *(UINT*)&src;
300             }
301             else
302             {
303                 outColor[comp] = *(UINT*)&src;
304                 UINT max = (1 << FormatTraits<DstFormat>::GetBPC(comp)) - 1;  // 2^numBits - 1
305 
306                 outColor[comp] = std::min(max, outColor[comp]);
307             }
308             break;
309         }
310         case SWR_TYPE_SINT:
311         {
312             if (FormatTraits<DstFormat>::GetBPC(comp) == 32)
313             {
314                 outColor[comp] = *(UINT*)&src;
315             }
316             else
317             {
318                 INT out = *(INT*)&src;  // Hot tile format is SINT?
319                 INT max = (1 << (FormatTraits<DstFormat>::GetBPC(comp) - 1)) - 1;
320                 INT min = -1 - max;
321 
322                 ///@note The output is unsigned integer (bag of bits) and so performing
323                 //       the clamping here based on range of output component. Also, manually adding
324                 //       the sign bit in the appropriate spot. Maybe a better way?
325                 out = std::max(out, min);
326                 out = std::min(out, max);
327 
328                 outColor[comp] = *(UINT*)&out;
329             }
330             break;
331         }
332         case SWR_TYPE_FLOAT:
333         {
334             if (FormatTraits<DstFormat>::GetBPC(comp) == 16)
335             {
336                 // Convert from 32-bit float to 16-bit float using _mm_cvtps_ph
337                 // @todo 16bit float instruction support is orthogonal to avx support.  need to
338                 // add check for F16C support instead.
339 #if KNOB_ARCH >= KNOB_ARCH_AVX2
340                 __m128 src128 = _mm_set1_ps(src);
341                 __m128i srci128 = _mm_cvtps_ph(src128, _MM_FROUND_TRUNC);
342                 UINT value = _mm_extract_epi16(srci128, 0);
343 #else
344                 UINT value = Convert32To16Float(src);
345 #endif
346 
347                 outColor[comp] = value;
348             }
349             else if (FormatTraits<DstFormat>::GetBPC(comp) == 11)
350             {
351                 outColor[comp] = Convert32ToSmallFloat<6>(src);
352             }
353             else if (FormatTraits<DstFormat>::GetBPC(comp) == 10)
354             {
355                 outColor[comp] = Convert32ToSmallFloat<5>(src);
356             }
357             else
358             {
359                 outColor[comp] = *(UINT*)&src;
360             }
361 
362             break;
363         }
364         default:
365             SWR_INVALID("Invalid type: %d", type);
366             break;
367         }
368     }
369 
370     typename FormatTraits<DstFormat>::FormatT* pPixel = (typename FormatTraits<DstFormat>::FormatT*)pDstPixel;
371 
372     switch (FormatTraits<DstFormat>::numComps)
373     {
374     case 4:
375         pPixel->a = outColor[3];
376     case 3:
377         pPixel->b = outColor[2];
378     case 2:
379         pPixel->g = outColor[1];
380     case 1:
381         pPixel->r = outColor[0];
382         break;
383     default:
384         SWR_INVALID("Invalid # of comps: %d", FormatTraits<DstFormat>::numComps);
385     }
386 }
387 
388 //////////////////////////////////////////////////////////////////////////
389 /// @brief Convert pixel in any format to float32
390 /// @param pDstPixel - Pointer to destination pixel.
391 /// @param srcPixel - Pointer to source pixel
392 template<SWR_FORMAT SrcFormat>
ConvertPixelToFloat(float dstPixel[4],const uint8_t * pSrc)393 INLINE static void ConvertPixelToFloat(
394     float dstPixel[4],
395     const uint8_t* pSrc)
396 {
397     uint32_t srcColor[4];  // typeless bits
398 
399     // unpack src pixel
400     typename FormatTraits<SrcFormat>::FormatT* pPixel = (typename FormatTraits<SrcFormat>::FormatT*)pSrc;
401 
402     // apply format defaults
403     for (uint32_t comp = 0; comp < 4; ++comp)
404     {
405         uint32_t def = FormatTraits<SrcFormat>::GetDefault(comp);
406         dstPixel[comp] = *(float*)&def;
407     }
408 
409     // load format data
410     switch (FormatTraits<SrcFormat>::numComps)
411     {
412     case 4:
413         srcColor[3] = pPixel->a;
414     case 3:
415         srcColor[2] = pPixel->b;
416     case 2:
417         srcColor[1] = pPixel->g;
418     case 1:
419         srcColor[0] = pPixel->r;
420         break;
421     default:
422         SWR_INVALID("Invalid # of comps: %d", FormatTraits<SrcFormat>::numComps);
423     }
424 
425     // Convert components
426     for (uint32_t comp = 0; comp < FormatTraits<SrcFormat>::numComps; ++comp)
427     {
428         SWR_TYPE type = FormatTraits<SrcFormat>::GetType(comp);
429 
430         uint32_t src = srcColor[comp];
431 
432         switch (type)
433         {
434         case SWR_TYPE_UNORM:
435         {
436             float dst;
437             if (FormatTraits<SrcFormat>::isSRGB && comp != 3)
438             {
439                 dst = *(float*)&srgb8Table[src];
440             }
441             else
442             {
443                 // component sizes > 16 must use fp divide to maintain ulp requirements
444                 if (FormatTraits<SrcFormat>::GetBPC(comp) > 16)
445                 {
446                     dst = (float)src / (float)((1 << FormatTraits<SrcFormat>::GetBPC(comp)) - 1);
447                 }
448                 else
449                 {
450                     const float scale = (1.0f / (float)((1 << FormatTraits<SrcFormat>::GetBPC(comp)) - 1));
451                     dst = (float)src * scale;
452                 }
453             }
454             dstPixel[FormatTraits<SrcFormat>::swizzle(comp)] = dst;
455             break;
456         }
457         case SWR_TYPE_SNORM:
458         {
459             SWR_ASSERT(!FormatTraits<SrcFormat>::isSRGB);
460 
461             float dst;
462             if (src == 0x10)
463             {
464                 dst = -1.0f;
465             }
466             else
467             {
468                 switch (FormatTraits<SrcFormat>::GetBPC(comp))
469                 {
470                 case 8:
471                     dst = (float)((int8_t)src);
472                     break;
473                 case 16:
474                     dst = (float)((int16_t)src);
475                     break;
476                 case 32:
477                     dst = (float)((int32_t)src);
478                     break;
479                 default:
480                     assert(0 && "attempted to load from SNORM with unsupported bpc");
481                     dst = 0.0f;
482                     break;
483                 }
484                 dst = dst * (1.0f / ((1 << (FormatTraits<SrcFormat>::GetBPC(comp) - 1)) - 1));
485             }
486             dstPixel[FormatTraits<SrcFormat>::swizzle(comp)] = dst;
487             break;
488         }
489         case SWR_TYPE_UINT:
490         {
491             uint32_t dst = (uint32_t)src;
492             dstPixel[FormatTraits<SrcFormat>::swizzle(comp)] = *(float*)&dst;
493             break;
494         }
495         case SWR_TYPE_SINT:
496         {
497             int dst;
498             switch (FormatTraits<SrcFormat>::GetBPC(comp))
499             {
500             case 8:
501                 dst = (int8_t)src;
502                 break;
503             case 16:
504                 dst = (int16_t)src;
505                 break;
506             case 32:
507                 dst = (int32_t)src;
508                 break;
509             default:
510                 assert(0 && "attempted to load from SINT with unsupported bpc");
511                 dst = 0;
512                 break;
513             }
514             dstPixel[FormatTraits<SrcFormat>::swizzle(comp)] = *(float*)&dst;
515             break;
516         }
517         case SWR_TYPE_FLOAT:
518         {
519             float dst;
520             if (FormatTraits<SrcFormat>::GetBPC(comp) == 16)
521             {
522 #if KNOB_ARCH >= KNOB_ARCH_AVX2
523                 // Convert from 16-bit float to 32-bit float using _mm_cvtph_ps
524                 // @todo 16bit float instruction support is orthogonal to avx support.  need to
525                 // add check for F16C support instead.
526                 __m128i src128 = _mm_set1_epi32(src);
527                 __m128 res = _mm_cvtph_ps(src128);
528                 _mm_store_ss(&dst, res);
529 #else
530                 dst = ConvertSmallFloatTo32(src);
531 #endif
532             }
533             else if (FormatTraits<SrcFormat>::GetBPC(comp) == 11)
534             {
535                 dst = ConvertSmallFloatTo32(src << 4);
536             }
537             else if (FormatTraits<SrcFormat>::GetBPC(comp) == 10)
538             {
539                 dst = ConvertSmallFloatTo32(src << 5);
540             }
541             else
542             {
543                 dst = *(float*)&src;
544             }
545 
546             dstPixel[FormatTraits<SrcFormat>::swizzle(comp)] = *(float*)&dst;
547             break;
548         }
549         default:
550             SWR_INVALID("Invalid type: %d", type);
551             break;
552         }
553     }
554 }
555 
556 // non-templated version of conversion functions
ConvertPixelFromFloat(SWR_FORMAT format,uint8_t * pDst,const float srcPixel[4])557 INLINE static void ConvertPixelFromFloat(
558     SWR_FORMAT format,
559     uint8_t* pDst,
560     const float srcPixel[4])
561 {
562     switch (format)
563     {
564     case R32G32B32A32_FLOAT: ConvertPixelFromFloat<R32G32B32A32_FLOAT>(pDst, srcPixel); break;
565     case R32G32B32A32_SINT: ConvertPixelFromFloat<R32G32B32A32_SINT>(pDst, srcPixel); break;
566     case R32G32B32A32_UINT: ConvertPixelFromFloat<R32G32B32A32_UINT>(pDst, srcPixel); break;
567     case R32G32B32X32_FLOAT: ConvertPixelFromFloat<R32G32B32X32_FLOAT>(pDst, srcPixel); break;
568     case R32G32B32A32_SSCALED: ConvertPixelFromFloat<R32G32B32A32_SSCALED>(pDst, srcPixel); break;
569     case R32G32B32A32_USCALED: ConvertPixelFromFloat<R32G32B32A32_USCALED>(pDst, srcPixel); break;
570     case R32G32B32_FLOAT: ConvertPixelFromFloat<R32G32B32_FLOAT>(pDst, srcPixel); break;
571     case R32G32B32_SINT: ConvertPixelFromFloat<R32G32B32_SINT>(pDst, srcPixel); break;
572     case R32G32B32_UINT: ConvertPixelFromFloat<R32G32B32_UINT>(pDst, srcPixel); break;
573     case R32G32B32_SSCALED: ConvertPixelFromFloat<R32G32B32_SSCALED>(pDst, srcPixel); break;
574     case R32G32B32_USCALED: ConvertPixelFromFloat<R32G32B32_USCALED>(pDst, srcPixel); break;
575     case R16G16B16A16_UNORM: ConvertPixelFromFloat<R16G16B16A16_UNORM>(pDst, srcPixel); break;
576     case R16G16B16A16_SNORM: ConvertPixelFromFloat<R16G16B16A16_SNORM>(pDst, srcPixel); break;
577     case R16G16B16A16_SINT: ConvertPixelFromFloat<R16G16B16A16_SINT>(pDst, srcPixel); break;
578     case R16G16B16A16_UINT: ConvertPixelFromFloat<R16G16B16A16_UINT>(pDst, srcPixel); break;
579     case R16G16B16A16_FLOAT: ConvertPixelFromFloat<R16G16B16A16_FLOAT>(pDst, srcPixel); break;
580     case R32G32_FLOAT: ConvertPixelFromFloat<R32G32_FLOAT>(pDst, srcPixel); break;
581     case R32G32_SINT: ConvertPixelFromFloat<R32G32_SINT>(pDst, srcPixel); break;
582     case R32G32_UINT: ConvertPixelFromFloat<R32G32_UINT>(pDst, srcPixel); break;
583     case R32_FLOAT_X8X24_TYPELESS: ConvertPixelFromFloat<R32_FLOAT_X8X24_TYPELESS>(pDst, srcPixel); break;
584     case X32_TYPELESS_G8X24_UINT: ConvertPixelFromFloat<X32_TYPELESS_G8X24_UINT>(pDst, srcPixel); break;
585     case L32A32_FLOAT: ConvertPixelFromFloat<L32A32_FLOAT>(pDst, srcPixel); break;
586     case R16G16B16X16_UNORM: ConvertPixelFromFloat<R16G16B16X16_UNORM>(pDst, srcPixel); break;
587     case R16G16B16X16_FLOAT: ConvertPixelFromFloat<R16G16B16X16_FLOAT>(pDst, srcPixel); break;
588     case L32X32_FLOAT: ConvertPixelFromFloat<L32X32_FLOAT>(pDst, srcPixel); break;
589     case I32X32_FLOAT: ConvertPixelFromFloat<I32X32_FLOAT>(pDst, srcPixel); break;
590     case R16G16B16A16_SSCALED: ConvertPixelFromFloat<R16G16B16A16_SSCALED>(pDst, srcPixel); break;
591     case R16G16B16A16_USCALED: ConvertPixelFromFloat<R16G16B16A16_USCALED>(pDst, srcPixel); break;
592     case R32G32_SSCALED: ConvertPixelFromFloat<R32G32_SSCALED>(pDst, srcPixel); break;
593     case R32G32_USCALED: ConvertPixelFromFloat<R32G32_USCALED>(pDst, srcPixel); break;
594     case B8G8R8A8_UNORM: ConvertPixelFromFloat<B8G8R8A8_UNORM>(pDst, srcPixel); break;
595     case B8G8R8A8_UNORM_SRGB: ConvertPixelFromFloat<B8G8R8A8_UNORM_SRGB>(pDst, srcPixel); break;
596     case R10G10B10A2_UNORM: ConvertPixelFromFloat<R10G10B10A2_UNORM>(pDst, srcPixel); break;
597     case R10G10B10A2_UNORM_SRGB: ConvertPixelFromFloat<R10G10B10A2_UNORM_SRGB>(pDst, srcPixel); break;
598     case R10G10B10A2_UINT: ConvertPixelFromFloat<R10G10B10A2_UINT>(pDst, srcPixel); break;
599     case R8G8B8A8_UNORM: ConvertPixelFromFloat<R8G8B8A8_UNORM>(pDst, srcPixel); break;
600     case R8G8B8A8_UNORM_SRGB: ConvertPixelFromFloat<R8G8B8A8_UNORM_SRGB>(pDst, srcPixel); break;
601     case R8G8B8A8_SNORM: ConvertPixelFromFloat<R8G8B8A8_SNORM>(pDst, srcPixel); break;
602     case R8G8B8A8_SINT: ConvertPixelFromFloat<R8G8B8A8_SINT>(pDst, srcPixel); break;
603     case R8G8B8A8_UINT: ConvertPixelFromFloat<R8G8B8A8_UINT>(pDst, srcPixel); break;
604     case R16G16_UNORM: ConvertPixelFromFloat<R16G16_UNORM>(pDst, srcPixel); break;
605     case R16G16_SNORM: ConvertPixelFromFloat<R16G16_SNORM>(pDst, srcPixel); break;
606     case R16G16_SINT: ConvertPixelFromFloat<R16G16_SINT>(pDst, srcPixel); break;
607     case R16G16_UINT: ConvertPixelFromFloat<R16G16_UINT>(pDst, srcPixel); break;
608     case R16G16_FLOAT: ConvertPixelFromFloat<R16G16_FLOAT>(pDst, srcPixel); break;
609     case B10G10R10A2_UNORM: ConvertPixelFromFloat<B10G10R10A2_UNORM>(pDst, srcPixel); break;
610     case B10G10R10A2_UNORM_SRGB: ConvertPixelFromFloat<B10G10R10A2_UNORM_SRGB>(pDst, srcPixel); break;
611     case R11G11B10_FLOAT: ConvertPixelFromFloat<R11G11B10_FLOAT>(pDst, srcPixel); break;
612     case R10G10B10_FLOAT_A2_UNORM: ConvertPixelFromFloat<R10G10B10_FLOAT_A2_UNORM>(pDst, srcPixel); break;
613     case R32_SINT: ConvertPixelFromFloat<R32_SINT>(pDst, srcPixel); break;
614     case R32_UINT: ConvertPixelFromFloat<R32_UINT>(pDst, srcPixel); break;
615     case R32_FLOAT: ConvertPixelFromFloat<R32_FLOAT>(pDst, srcPixel); break;
616     case R24_UNORM_X8_TYPELESS: ConvertPixelFromFloat<R24_UNORM_X8_TYPELESS>(pDst, srcPixel); break;
617     case X24_TYPELESS_G8_UINT: ConvertPixelFromFloat<X24_TYPELESS_G8_UINT>(pDst, srcPixel); break;
618     case L32_UNORM: ConvertPixelFromFloat<L32_UNORM>(pDst, srcPixel); break;
619     case L16A16_UNORM: ConvertPixelFromFloat<L16A16_UNORM>(pDst, srcPixel); break;
620     case I24X8_UNORM: ConvertPixelFromFloat<I24X8_UNORM>(pDst, srcPixel); break;
621     case L24X8_UNORM: ConvertPixelFromFloat<L24X8_UNORM>(pDst, srcPixel); break;
622     case I32_FLOAT: ConvertPixelFromFloat<I32_FLOAT>(pDst, srcPixel); break;
623     case L32_FLOAT: ConvertPixelFromFloat<L32_FLOAT>(pDst, srcPixel); break;
624     case A32_FLOAT: ConvertPixelFromFloat<A32_FLOAT>(pDst, srcPixel); break;
625     case B8G8R8X8_UNORM: ConvertPixelFromFloat<B8G8R8X8_UNORM>(pDst, srcPixel); break;
626     case B8G8R8X8_UNORM_SRGB: ConvertPixelFromFloat<B8G8R8X8_UNORM_SRGB>(pDst, srcPixel); break;
627     case R8G8B8X8_UNORM: ConvertPixelFromFloat<R8G8B8X8_UNORM>(pDst, srcPixel); break;
628     case R8G8B8X8_UNORM_SRGB: ConvertPixelFromFloat<R8G8B8X8_UNORM_SRGB>(pDst, srcPixel); break;
629     case R9G9B9E5_SHAREDEXP: ConvertPixelFromFloat<R9G9B9E5_SHAREDEXP>(pDst, srcPixel); break;
630     case B10G10R10X2_UNORM: ConvertPixelFromFloat<B10G10R10X2_UNORM>(pDst, srcPixel); break;
631     case L16A16_FLOAT: ConvertPixelFromFloat<L16A16_FLOAT>(pDst, srcPixel); break;
632     case R10G10B10X2_USCALED: ConvertPixelFromFloat<R10G10B10X2_USCALED>(pDst, srcPixel); break;
633     case R8G8B8A8_SSCALED: ConvertPixelFromFloat<R8G8B8A8_SSCALED>(pDst, srcPixel); break;
634     case R8G8B8A8_USCALED: ConvertPixelFromFloat<R8G8B8A8_USCALED>(pDst, srcPixel); break;
635     case R16G16_SSCALED: ConvertPixelFromFloat<R16G16_SSCALED>(pDst, srcPixel); break;
636     case R16G16_USCALED: ConvertPixelFromFloat<R16G16_USCALED>(pDst, srcPixel); break;
637     case R32_SSCALED: ConvertPixelFromFloat<R32_SSCALED>(pDst, srcPixel); break;
638     case R32_USCALED: ConvertPixelFromFloat<R32_USCALED>(pDst, srcPixel); break;
639     case B5G6R5_UNORM: ConvertPixelFromFloat<B5G6R5_UNORM>(pDst, srcPixel); break;
640     case B5G6R5_UNORM_SRGB: ConvertPixelFromFloat<B5G6R5_UNORM_SRGB>(pDst, srcPixel); break;
641     case B5G5R5A1_UNORM: ConvertPixelFromFloat<B5G5R5A1_UNORM>(pDst, srcPixel); break;
642     case B5G5R5A1_UNORM_SRGB: ConvertPixelFromFloat<B5G5R5A1_UNORM_SRGB>(pDst, srcPixel); break;
643     case B4G4R4A4_UNORM: ConvertPixelFromFloat<B4G4R4A4_UNORM>(pDst, srcPixel); break;
644     case B4G4R4A4_UNORM_SRGB: ConvertPixelFromFloat<B4G4R4A4_UNORM_SRGB>(pDst, srcPixel); break;
645     case R8G8_UNORM: ConvertPixelFromFloat<R8G8_UNORM>(pDst, srcPixel); break;
646     case R8G8_SNORM: ConvertPixelFromFloat<R8G8_SNORM>(pDst, srcPixel); break;
647     case R8G8_SINT: ConvertPixelFromFloat<R8G8_SINT>(pDst, srcPixel); break;
648     case R8G8_UINT: ConvertPixelFromFloat<R8G8_UINT>(pDst, srcPixel); break;
649     case R16_UNORM: ConvertPixelFromFloat<R16_UNORM>(pDst, srcPixel); break;
650     case R16_SNORM: ConvertPixelFromFloat<R16_SNORM>(pDst, srcPixel); break;
651     case R16_SINT: ConvertPixelFromFloat<R16_SINT>(pDst, srcPixel); break;
652     case R16_UINT: ConvertPixelFromFloat<R16_UINT>(pDst, srcPixel); break;
653     case R16_FLOAT: ConvertPixelFromFloat<R16_FLOAT>(pDst, srcPixel); break;
654     case I16_UNORM: ConvertPixelFromFloat<I16_UNORM>(pDst, srcPixel); break;
655     case L16_UNORM: ConvertPixelFromFloat<L16_UNORM>(pDst, srcPixel); break;
656     case A16_UNORM: ConvertPixelFromFloat<A16_UNORM>(pDst, srcPixel); break;
657     case L8A8_UNORM: ConvertPixelFromFloat<L8A8_UNORM>(pDst, srcPixel); break;
658     case I16_FLOAT: ConvertPixelFromFloat<I16_FLOAT>(pDst, srcPixel); break;
659     case L16_FLOAT: ConvertPixelFromFloat<L16_FLOAT>(pDst, srcPixel); break;
660     case A16_FLOAT: ConvertPixelFromFloat<A16_FLOAT>(pDst, srcPixel); break;
661     case L8A8_UNORM_SRGB: ConvertPixelFromFloat<L8A8_UNORM_SRGB>(pDst, srcPixel); break;
662     case B5G5R5X1_UNORM: ConvertPixelFromFloat<B5G5R5X1_UNORM>(pDst, srcPixel); break;
663     case B5G5R5X1_UNORM_SRGB: ConvertPixelFromFloat<B5G5R5X1_UNORM_SRGB>(pDst, srcPixel); break;
664     case R8G8_SSCALED: ConvertPixelFromFloat<R8G8_SSCALED>(pDst, srcPixel); break;
665     case R8G8_USCALED: ConvertPixelFromFloat<R8G8_USCALED>(pDst, srcPixel); break;
666     case R16_SSCALED: ConvertPixelFromFloat<R16_SSCALED>(pDst, srcPixel); break;
667     case R16_USCALED: ConvertPixelFromFloat<R16_USCALED>(pDst, srcPixel); break;
668     case A1B5G5R5_UNORM: ConvertPixelFromFloat<A1B5G5R5_UNORM>(pDst, srcPixel); break;
669     case A4B4G4R4_UNORM: ConvertPixelFromFloat<A4B4G4R4_UNORM>(pDst, srcPixel); break;
670     case L8A8_UINT: ConvertPixelFromFloat<L8A8_UINT>(pDst, srcPixel); break;
671     case L8A8_SINT: ConvertPixelFromFloat<L8A8_SINT>(pDst, srcPixel); break;
672     case R8_UNORM: ConvertPixelFromFloat<R8_UNORM>(pDst, srcPixel); break;
673     case R8_SNORM: ConvertPixelFromFloat<R8_SNORM>(pDst, srcPixel); break;
674     case R8_SINT: ConvertPixelFromFloat<R8_SINT>(pDst, srcPixel); break;
675     case R8_UINT: ConvertPixelFromFloat<R8_UINT>(pDst, srcPixel); break;
676     case A8_UNORM: ConvertPixelFromFloat<A8_UNORM>(pDst, srcPixel); break;
677     case I8_UNORM: ConvertPixelFromFloat<I8_UNORM>(pDst, srcPixel); break;
678     case L8_UNORM: ConvertPixelFromFloat<L8_UNORM>(pDst, srcPixel); break;
679     case R8_SSCALED: ConvertPixelFromFloat<R8_SSCALED>(pDst, srcPixel); break;
680     case R8_USCALED: ConvertPixelFromFloat<R8_USCALED>(pDst, srcPixel); break;
681     case L8_UNORM_SRGB: ConvertPixelFromFloat<L8_UNORM_SRGB>(pDst, srcPixel); break;
682     case L8_UINT: ConvertPixelFromFloat<L8_UINT>(pDst, srcPixel); break;
683     case L8_SINT: ConvertPixelFromFloat<L8_SINT>(pDst, srcPixel); break;
684     case I8_UINT: ConvertPixelFromFloat<I8_UINT>(pDst, srcPixel); break;
685     case I8_SINT: ConvertPixelFromFloat<I8_SINT>(pDst, srcPixel); break;
686     case YCRCB_SWAPUVY: ConvertPixelFromFloat<YCRCB_SWAPUVY>(pDst, srcPixel); break;
687     case BC1_UNORM: ConvertPixelFromFloat<BC1_UNORM>(pDst, srcPixel); break;
688     case BC2_UNORM: ConvertPixelFromFloat<BC2_UNORM>(pDst, srcPixel); break;
689     case BC3_UNORM: ConvertPixelFromFloat<BC3_UNORM>(pDst, srcPixel); break;
690     case BC4_UNORM: ConvertPixelFromFloat<BC4_UNORM>(pDst, srcPixel); break;
691     case BC5_UNORM: ConvertPixelFromFloat<BC5_UNORM>(pDst, srcPixel); break;
692     case BC1_UNORM_SRGB: ConvertPixelFromFloat<BC1_UNORM_SRGB>(pDst, srcPixel); break;
693     case BC2_UNORM_SRGB: ConvertPixelFromFloat<BC2_UNORM_SRGB>(pDst, srcPixel); break;
694     case BC3_UNORM_SRGB: ConvertPixelFromFloat<BC3_UNORM_SRGB>(pDst, srcPixel); break;
695     case YCRCB_SWAPUV: ConvertPixelFromFloat<YCRCB_SWAPUV>(pDst, srcPixel); break;
696     case R8G8B8_UNORM: ConvertPixelFromFloat<R8G8B8_UNORM>(pDst, srcPixel); break;
697     case R8G8B8_SNORM: ConvertPixelFromFloat<R8G8B8_SNORM>(pDst, srcPixel); break;
698     case R8G8B8_SSCALED: ConvertPixelFromFloat<R8G8B8_SSCALED>(pDst, srcPixel); break;
699     case R8G8B8_USCALED: ConvertPixelFromFloat<R8G8B8_USCALED>(pDst, srcPixel); break;
700     case BC4_SNORM: ConvertPixelFromFloat<BC4_SNORM>(pDst, srcPixel); break;
701     case BC5_SNORM: ConvertPixelFromFloat<BC5_SNORM>(pDst, srcPixel); break;
702     case R16G16B16_FLOAT: ConvertPixelFromFloat<R16G16B16_FLOAT>(pDst, srcPixel); break;
703     case R16G16B16_UNORM: ConvertPixelFromFloat<R16G16B16_UNORM>(pDst, srcPixel); break;
704     case R16G16B16_SNORM: ConvertPixelFromFloat<R16G16B16_SNORM>(pDst, srcPixel); break;
705     case R16G16B16_SSCALED: ConvertPixelFromFloat<R16G16B16_SSCALED>(pDst, srcPixel); break;
706     case R16G16B16_USCALED: ConvertPixelFromFloat<R16G16B16_USCALED>(pDst, srcPixel); break;
707     case BC6H_SF16: ConvertPixelFromFloat<BC6H_SF16>(pDst, srcPixel); break;
708     case BC7_UNORM: ConvertPixelFromFloat<BC7_UNORM>(pDst, srcPixel); break;
709     case BC7_UNORM_SRGB: ConvertPixelFromFloat<BC7_UNORM_SRGB>(pDst, srcPixel); break;
710     case BC6H_UF16: ConvertPixelFromFloat<BC6H_UF16>(pDst, srcPixel); break;
711     case R8G8B8_UNORM_SRGB: ConvertPixelFromFloat<R8G8B8_UNORM_SRGB>(pDst, srcPixel); break;
712     case R16G16B16_UINT: ConvertPixelFromFloat<R16G16B16_UINT>(pDst, srcPixel); break;
713     case R16G16B16_SINT: ConvertPixelFromFloat<R16G16B16_SINT>(pDst, srcPixel); break;
714     case R10G10B10A2_SNORM: ConvertPixelFromFloat<R10G10B10A2_SNORM>(pDst, srcPixel); break;
715     case R10G10B10A2_USCALED: ConvertPixelFromFloat<R10G10B10A2_USCALED>(pDst, srcPixel); break;
716     case R10G10B10A2_SSCALED: ConvertPixelFromFloat<R10G10B10A2_SSCALED>(pDst, srcPixel); break;
717     case R10G10B10A2_SINT: ConvertPixelFromFloat<R10G10B10A2_SINT>(pDst, srcPixel); break;
718     case B10G10R10A2_SNORM: ConvertPixelFromFloat<B10G10R10A2_SNORM>(pDst, srcPixel); break;
719     case B10G10R10A2_USCALED: ConvertPixelFromFloat<B10G10R10A2_USCALED>(pDst, srcPixel); break;
720     case B10G10R10A2_SSCALED: ConvertPixelFromFloat<B10G10R10A2_SSCALED>(pDst, srcPixel); break;
721     case B10G10R10A2_UINT: ConvertPixelFromFloat<B10G10R10A2_UINT>(pDst, srcPixel); break;
722     case B10G10R10A2_SINT: ConvertPixelFromFloat<B10G10R10A2_SINT>(pDst, srcPixel); break;
723     case R8G8B8_UINT: ConvertPixelFromFloat<R8G8B8_UINT>(pDst, srcPixel); break;
724     case R8G8B8_SINT: ConvertPixelFromFloat<R8G8B8_SINT>(pDst, srcPixel); break;
725     case RAW: ConvertPixelFromFloat<RAW>(pDst, srcPixel); break;
726     default:
727         SWR_INVALID("Invalid format: %d", format);
728         break;
729     }
730 }
731