1 //
2 // Copyright 2014 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 
7 // copyvertex.inc.h: Implementation of vertex buffer copying and conversion functions
8 
9 namespace rx
10 {
11 
12 template <typename T,
13           size_t inputComponentCount,
14           size_t outputComponentCount,
15           uint32_t alphaDefaultValueBits>
CopyNativeVertexData(const uint8_t * input,size_t stride,size_t count,uint8_t * output)16 inline void CopyNativeVertexData(const uint8_t *input, size_t stride, size_t count, uint8_t *output)
17 {
18     const size_t attribSize = sizeof(T) * inputComponentCount;
19 
20     if (attribSize == stride && inputComponentCount == outputComponentCount)
21     {
22         memcpy(output, input, count * attribSize);
23         return;
24     }
25 
26     if (inputComponentCount == outputComponentCount)
27     {
28         for (size_t i = 0; i < count; i++)
29         {
30             const T *offsetInput = reinterpret_cast<const T *>(input + (i * stride));
31             T *offsetOutput      = reinterpret_cast<T *>(output) + i * outputComponentCount;
32 
33             memcpy(offsetOutput, offsetInput, attribSize);
34         }
35         return;
36     }
37 
38     const T defaultAlphaValue                = gl::bitCast<T>(alphaDefaultValueBits);
39     const size_t lastNonAlphaOutputComponent = std::min<size_t>(outputComponentCount, 3);
40 
41     for (size_t i = 0; i < count; i++)
42     {
43         const T *offsetInput = reinterpret_cast<const T *>(input + (i * stride));
44         T *offsetOutput      = reinterpret_cast<T *>(output) + i * outputComponentCount;
45 
46         memcpy(offsetOutput, offsetInput, attribSize);
47 
48         if (inputComponentCount < lastNonAlphaOutputComponent)
49         {
50             // Set the remaining G/B channels to 0.
51             size_t numComponents = (lastNonAlphaOutputComponent - inputComponentCount);
52             memset(&offsetOutput[inputComponentCount], 0, numComponents * sizeof(T));
53         }
54 
55         if (inputComponentCount < outputComponentCount && outputComponentCount == 4)
56         {
57             // Set the remaining alpha channel to the defaultAlphaValue.
58             offsetOutput[3] = defaultAlphaValue;
59         }
60     }
61 }
62 
63 template <size_t inputComponentCount, size_t outputComponentCount>
Copy8SintTo16SintVertexData(const uint8_t * input,size_t stride,size_t count,uint8_t * output)64 inline void Copy8SintTo16SintVertexData(const uint8_t *input,
65                                         size_t stride,
66                                         size_t count,
67                                         uint8_t *output)
68 {
69     const size_t lastNonAlphaOutputComponent = std::min<size_t>(outputComponentCount, 3);
70 
71     for (size_t i = 0; i < count; i++)
72     {
73         const GLbyte *offsetInput = reinterpret_cast<const GLbyte *>(input + i * stride);
74         GLshort *offsetOutput     = reinterpret_cast<GLshort *>(output) + i * outputComponentCount;
75 
76         for (size_t j = 0; j < inputComponentCount; j++)
77         {
78             offsetOutput[j] = static_cast<GLshort>(offsetInput[j]);
79         }
80 
81         for (size_t j = inputComponentCount; j < lastNonAlphaOutputComponent; j++)
82         {
83             // Set remaining G/B channels to 0.
84             offsetOutput[j] = 0;
85         }
86 
87         if (inputComponentCount < outputComponentCount && outputComponentCount == 4)
88         {
89             // On integer formats, we must set the Alpha channel to 1 if it's unused.
90             offsetOutput[3] = 1;
91         }
92     }
93 }
94 
95 template <size_t inputComponentCount, size_t outputComponentCount>
Copy8SnormTo16SnormVertexData(const uint8_t * input,size_t stride,size_t count,uint8_t * output)96 inline void Copy8SnormTo16SnormVertexData(const uint8_t *input,
97                                           size_t stride,
98                                           size_t count,
99                                           uint8_t *output)
100 {
101     for (size_t i = 0; i < count; i++)
102     {
103         const GLbyte *offsetInput = reinterpret_cast<const GLbyte *>(input + i * stride);
104         GLshort *offsetOutput     = reinterpret_cast<GLshort *>(output) + i * outputComponentCount;
105 
106         for (size_t j = 0; j < inputComponentCount; j++)
107         {
108             // The original GLbyte value ranges from -128 to +127 (INT8_MAX).
109             // When converted to GLshort, the value must be scaled to between -32768 and +32767
110             // (INT16_MAX).
111             if (offsetInput[j] > 0)
112             {
113                 offsetOutput[j] =
114                     offsetInput[j] << 8 | offsetInput[j] << 1 | ((offsetInput[j] & 0x40) >> 6);
115             }
116             else
117             {
118                 offsetOutput[j] = offsetInput[j] << 8;
119             }
120         }
121 
122         for (size_t j = inputComponentCount; j < std::min<size_t>(outputComponentCount, 3); j++)
123         {
124             // Set remaining G/B channels to 0.
125             offsetOutput[j] = 0;
126         }
127 
128         if (inputComponentCount < outputComponentCount && outputComponentCount == 4)
129         {
130             // On normalized formats, we must set the Alpha channel to the max value if it's unused.
131             offsetOutput[3] = INT16_MAX;
132         }
133     }
134 }
135 
136 template <size_t inputComponentCount, size_t outputComponentCount>
Copy32FixedTo32FVertexData(const uint8_t * input,size_t stride,size_t count,uint8_t * output)137 inline void Copy32FixedTo32FVertexData(const uint8_t *input,
138                                        size_t stride,
139                                        size_t count,
140                                        uint8_t *output)
141 {
142     static const float divisor = 1.0f / (1 << 16);
143 
144     for (size_t i = 0; i < count; i++)
145     {
146         const uint8_t *offsetInput = input + i * stride;
147         float *offsetOutput        = reinterpret_cast<float *>(output) + i * outputComponentCount;
148 
149         // GLfixed access must be 4-byte aligned on arm32, input and stride sometimes are not
150         if (reinterpret_cast<uintptr_t>(offsetInput) % sizeof(GLfixed) == 0)
151         {
152             for (size_t j = 0; j < inputComponentCount; j++)
153             {
154                 offsetOutput[j] =
155                     static_cast<float>(reinterpret_cast<const GLfixed *>(offsetInput)[j]) * divisor;
156             }
157         }
158         else
159         {
160             for (size_t j = 0; j < inputComponentCount; j++)
161             {
162                 GLfixed alignedInput;
163                 memcpy(&alignedInput, offsetInput + j * sizeof(GLfixed), sizeof(GLfixed));
164                 offsetOutput[j] = static_cast<float>(alignedInput) * divisor;
165             }
166         }
167 
168         // 4-component output formats would need special padding in the alpha channel.
169         static_assert(!(inputComponentCount < 4 && outputComponentCount == 4),
170                       "An inputComponentCount less than 4 and an outputComponentCount equal to 4 "
171                       "is not supported.");
172 
173         for (size_t j = inputComponentCount; j < outputComponentCount; j++)
174         {
175             offsetOutput[j] = 0.0f;
176         }
177     }
178 }
179 
180 template <typename T,
181           size_t inputComponentCount,
182           size_t outputComponentCount,
183           bool normalized,
184           bool toHalf>
CopyToFloatVertexData(const uint8_t * input,size_t stride,size_t count,uint8_t * output)185 inline void CopyToFloatVertexData(const uint8_t *input,
186                                   size_t stride,
187                                   size_t count,
188                                   uint8_t *output)
189 {
190     typedef std::numeric_limits<T> NL;
191     typedef typename std::conditional<toHalf, GLhalf, float>::type outputType;
192 
193     for (size_t i = 0; i < count; i++)
194     {
195         const T *offsetInput = reinterpret_cast<const T *>(input + (stride * i));
196         outputType *offsetOutput =
197             reinterpret_cast<outputType *>(output) + i * outputComponentCount;
198 
199         for (size_t j = 0; j < inputComponentCount; j++)
200         {
201             float result = 0;
202 
203             if (normalized)
204             {
205                 if (NL::is_signed)
206                 {
207                     result = static_cast<float>(offsetInput[j]) / static_cast<float>(NL::max());
208                     result = result >= -1.0f ? result : -1.0f;
209                 }
210                 else
211                 {
212                     result = static_cast<float>(offsetInput[j]) / static_cast<float>(NL::max());
213                 }
214             }
215             else
216             {
217                 result = static_cast<float>(offsetInput[j]);
218             }
219 
220             if (toHalf)
221             {
222                 offsetOutput[j] = gl::float32ToFloat16(result);
223             }
224             else
225             {
226                 offsetOutput[j] = static_cast<outputType>(result);
227             }
228         }
229 
230         for (size_t j = inputComponentCount; j < outputComponentCount; j++)
231         {
232             offsetOutput[j] = 0;
233         }
234 
235         if (inputComponentCount < 4 && outputComponentCount == 4)
236         {
237             if (toHalf)
238             {
239                 offsetOutput[3] = gl::Float16One;
240             }
241             else
242             {
243                 offsetOutput[3] = static_cast<outputType>(gl::Float32One);
244             }
245         }
246     }
247 }
248 
249 template <size_t inputComponentCount, size_t outputComponentCount>
Copy32FTo16FVertexData(const uint8_t * input,size_t stride,size_t count,uint8_t * output)250 void Copy32FTo16FVertexData(const uint8_t *input, size_t stride, size_t count, uint8_t *output)
251 {
252     const unsigned short kZero = gl::float32ToFloat16(0.0f);
253     const unsigned short kOne  = gl::float32ToFloat16(1.0f);
254 
255     for (size_t i = 0; i < count; i++)
256     {
257         const float *offsetInput = reinterpret_cast<const float *>(input + (stride * i));
258         unsigned short *offsetOutput =
259             reinterpret_cast<unsigned short *>(output) + i * outputComponentCount;
260 
261         for (size_t j = 0; j < inputComponentCount; j++)
262         {
263             offsetOutput[j] = gl::float32ToFloat16(offsetInput[j]);
264         }
265 
266         for (size_t j = inputComponentCount; j < outputComponentCount; j++)
267         {
268             offsetOutput[j] = (j == 3) ? kOne : kZero;
269         }
270     }
271 }
272 
CopyXYZ32FToXYZ9E5(const uint8_t * input,size_t stride,size_t count,uint8_t * output)273 inline void CopyXYZ32FToXYZ9E5(const uint8_t *input, size_t stride, size_t count, uint8_t *output)
274 {
275     for (size_t i = 0; i < count; i++)
276     {
277         const float *offsetInput   = reinterpret_cast<const float *>(input + (stride * i));
278         unsigned int *offsetOutput = reinterpret_cast<unsigned int *>(output) + i;
279 
280         *offsetOutput = gl::convertRGBFloatsTo999E5(offsetInput[0], offsetInput[1], offsetInput[2]);
281     }
282 }
283 
CopyXYZ32FToX11Y11B10F(const uint8_t * input,size_t stride,size_t count,uint8_t * output)284 inline void CopyXYZ32FToX11Y11B10F(const uint8_t *input,
285                                    size_t stride,
286                                    size_t count,
287                                    uint8_t *output)
288 {
289     for (size_t i = 0; i < count; i++)
290     {
291         const float *offsetInput   = reinterpret_cast<const float *>(input + (stride * i));
292         unsigned int *offsetOutput = reinterpret_cast<unsigned int *>(output) + i;
293 
294         *offsetOutput = gl::float32ToFloat11(offsetInput[0]) << 0 |
295                         gl::float32ToFloat11(offsetInput[1]) << 11 |
296                         gl::float32ToFloat10(offsetInput[2]) << 22;
297     }
298 }
299 
300 namespace priv
301 {
302 
303 template <bool isSigned, bool normalized, bool toFloat, bool toHalf>
CopyPackedRGB(uint32_t data,uint8_t * output)304 static inline void CopyPackedRGB(uint32_t data, uint8_t *output)
305 {
306     const uint32_t rgbSignMask  = 0x200;       // 1 set at the 9 bit
307     const uint32_t negativeMask = 0xFFFFFC00;  // All bits from 10 to 31 set to 1
308 
309     if (toFloat || toHalf)
310     {
311         GLfloat finalValue = static_cast<GLfloat>(data);
312         if (isSigned)
313         {
314             if (data & rgbSignMask)
315             {
316                 int negativeNumber = data | negativeMask;
317                 finalValue         = static_cast<GLfloat>(negativeNumber);
318             }
319 
320             if (normalized)
321             {
322                 const int32_t maxValue = 0x1FF;       // 1 set in bits 0 through 8
323                 const int32_t minValue = 0xFFFFFE01;  // Inverse of maxValue
324 
325                 // A 10-bit two's complement number has the possibility of being minValue - 1 but
326                 // OpenGL's normalization rules dictate that it should be clamped to minValue in
327                 // this case.
328                 if (finalValue < minValue)
329                 {
330                     finalValue = minValue;
331                 }
332 
333                 const int32_t halfRange = (maxValue - minValue) >> 1;
334                 finalValue              = ((finalValue - minValue) / halfRange) - 1.0f;
335             }
336         }
337         else
338         {
339             if (normalized)
340             {
341                 const uint32_t maxValue = 0x3FF;  // 1 set in bits 0 through 9
342                 finalValue /= static_cast<GLfloat>(maxValue);
343             }
344         }
345 
346         if (toHalf)
347         {
348             *reinterpret_cast<GLhalf *>(output) = gl::float32ToFloat16(finalValue);
349         }
350         else
351         {
352             *reinterpret_cast<GLfloat *>(output) = finalValue;
353         }
354     }
355     else
356     {
357         if (isSigned)
358         {
359             GLshort *intOutput = reinterpret_cast<GLshort *>(output);
360 
361             if (data & rgbSignMask)
362             {
363                 *intOutput = static_cast<GLshort>(data | negativeMask);
364             }
365             else
366             {
367                 *intOutput = static_cast<GLshort>(data);
368             }
369         }
370         else
371         {
372             GLushort *uintOutput = reinterpret_cast<GLushort *>(output);
373             *uintOutput          = static_cast<GLushort>(data);
374         }
375     }
376 }
377 
378 template <bool isSigned, bool normalized, bool toFloat, bool toHalf>
CopyPackedAlpha(uint32_t data,uint8_t * output)379 inline void CopyPackedAlpha(uint32_t data, uint8_t *output)
380 {
381     ASSERT(data >= 0 && data <= 3);
382 
383     if (toFloat || toHalf)
384     {
385         GLfloat finalValue = 0;
386         if (isSigned)
387         {
388             if (normalized)
389             {
390                 switch (data)
391                 {
392                     case 0x0:
393                         finalValue = 0.0f;
394                         break;
395                     case 0x1:
396                         finalValue = 1.0f;
397                         break;
398                     case 0x2:
399                         finalValue = -1.0f;
400                         break;
401                     case 0x3:
402                         finalValue = -1.0f;
403                         break;
404                     default:
405                         UNREACHABLE();
406                 }
407             }
408             else
409             {
410                 switch (data)
411                 {
412                     case 0x0:
413                         finalValue = 0.0f;
414                         break;
415                     case 0x1:
416                         finalValue = 1.0f;
417                         break;
418                     case 0x2:
419                         finalValue = -2.0f;
420                         break;
421                     case 0x3:
422                         finalValue = -1.0f;
423                         break;
424                     default:
425                         UNREACHABLE();
426                 }
427             }
428         }
429         else
430         {
431             if (normalized)
432             {
433                 finalValue = data / 3.0f;
434             }
435             else
436             {
437                 finalValue = static_cast<float>(data);
438             }
439         }
440 
441         if (toHalf)
442         {
443             *reinterpret_cast<GLhalf *>(output) = gl::float32ToFloat16(finalValue);
444         }
445         else
446         {
447             *reinterpret_cast<GLfloat *>(output) = finalValue;
448         }
449     }
450     else
451     {
452         if (isSigned)
453         {
454             GLshort *intOutput = reinterpret_cast<GLshort *>(output);
455             switch (data)
456             {
457                 case 0x0:
458                     *intOutput = 0;
459                     break;
460                 case 0x1:
461                     *intOutput = 1;
462                     break;
463                 case 0x2:
464                     *intOutput = -2;
465                     break;
466                 case 0x3:
467                     *intOutput = -1;
468                     break;
469                 default:
470                     UNREACHABLE();
471             }
472         }
473         else
474         {
475             *reinterpret_cast<GLushort *>(output) = static_cast<GLushort>(data);
476         }
477     }
478 }
479 
480 }  // namespace priv
481 
482 template <bool isSigned, bool normalized, bool toFloat, bool toHalf>
CopyXYZ10W2ToXYZWFloatVertexData(const uint8_t * input,size_t stride,size_t count,uint8_t * output)483 inline void CopyXYZ10W2ToXYZWFloatVertexData(const uint8_t *input,
484                                              size_t stride,
485                                              size_t count,
486                                              uint8_t *output)
487 {
488     const size_t outputComponentSize = toFloat && !toHalf ? 4 : 2;
489     const size_t componentCount      = 4;
490 
491     const uint32_t rgbMask  = 0x3FF;  // 1 set in bits 0 through 9
492     const size_t redShift   = 0;      // red is bits 0 through 9
493     const size_t greenShift = 10;     // green is bits 10 through 19
494     const size_t blueShift  = 20;     // blue is bits 20 through 29
495 
496     const uint32_t alphaMask = 0x3;  // 1 set in bits 0 and 1
497     const size_t alphaShift  = 30;   // Alpha is the 30 and 31 bits
498 
499     for (size_t i = 0; i < count; i++)
500     {
501         GLuint packedValue    = *reinterpret_cast<const GLuint *>(input + (i * stride));
502         uint8_t *offsetOutput = output + (i * outputComponentSize * componentCount);
503 
504         priv::CopyPackedRGB<isSigned, normalized, toFloat, toHalf>(
505             (packedValue >> redShift) & rgbMask, offsetOutput + (0 * outputComponentSize));
506         priv::CopyPackedRGB<isSigned, normalized, toFloat, toHalf>(
507             (packedValue >> greenShift) & rgbMask, offsetOutput + (1 * outputComponentSize));
508         priv::CopyPackedRGB<isSigned, normalized, toFloat, toHalf>(
509             (packedValue >> blueShift) & rgbMask, offsetOutput + (2 * outputComponentSize));
510         priv::CopyPackedAlpha<isSigned, normalized, toFloat, toHalf>(
511             (packedValue >> alphaShift) & alphaMask, offsetOutput + (3 * outputComponentSize));
512     }
513 }
514 
515 template <bool isSigned, bool normalized, bool toHalf>
CopyXYZ10ToXYZWFloatVertexData(const uint8_t * input,size_t stride,size_t count,uint8_t * output)516 inline void CopyXYZ10ToXYZWFloatVertexData(const uint8_t *input,
517                                            size_t stride,
518                                            size_t count,
519                                            uint8_t *output)
520 {
521     const size_t outputComponentSize = toHalf ? 2 : 4;
522     const size_t componentCount      = 4;
523 
524     const uint32_t rgbMask  = 0x3FF;  // 1 set in bits 0 through 9
525     const size_t redShift   = 22;     // red is bits 22 through 31
526     const size_t greenShift = 12;     // green is bits 12 through 21
527     const size_t blueShift  = 2;      // blue is bits 2 through 11
528 
529     const uint32_t alphaDefaultValueBits = normalized ? (isSigned ? 0x1 : 0x3) : 0x1;
530 
531     for (size_t i = 0; i < count; i++)
532     {
533         GLuint packedValue    = *reinterpret_cast<const GLuint *>(input + (i * stride));
534         uint8_t *offsetOutput = output + (i * outputComponentSize * componentCount);
535 
536         priv::CopyPackedRGB<isSigned, normalized, true, toHalf>(
537             (packedValue >> redShift) & rgbMask, offsetOutput + (0 * outputComponentSize));
538         priv::CopyPackedRGB<isSigned, normalized, true, toHalf>(
539             (packedValue >> greenShift) & rgbMask, offsetOutput + (1 * outputComponentSize));
540         priv::CopyPackedRGB<isSigned, normalized, true, toHalf>(
541             (packedValue >> blueShift) & rgbMask, offsetOutput + (2 * outputComponentSize));
542         priv::CopyPackedAlpha<isSigned, normalized, true, toHalf>(
543             alphaDefaultValueBits, offsetOutput + (3 * outputComponentSize));
544     }
545 }
546 
547 template <bool isSigned, bool normalized, bool toHalf>
CopyW2XYZ10ToXYZWFloatVertexData(const uint8_t * input,size_t stride,size_t count,uint8_t * output)548 inline void CopyW2XYZ10ToXYZWFloatVertexData(const uint8_t *input,
549                                              size_t stride,
550                                              size_t count,
551                                              uint8_t *output)
552 {
553     const size_t outputComponentSize = toHalf ? 2 : 4;
554     const size_t componentCount      = 4;
555 
556     const uint32_t rgbMask  = 0x3FF;  // 1 set in bits 0 through 9
557     const size_t redShift   = 22;     // red is bits 22 through 31
558     const size_t greenShift = 12;     // green is bits 12 through 21
559     const size_t blueShift  = 2;      // blue is bits 2 through 11
560 
561     const uint32_t alphaMask = 0x3;  // 1 set in bits 0 and 1
562     const size_t alphaShift  = 0;    // Alpha is the 30 and 31 bits
563 
564     for (size_t i = 0; i < count; i++)
565     {
566         GLuint packedValue    = *reinterpret_cast<const GLuint *>(input + (i * stride));
567         uint8_t *offsetOutput = output + (i * outputComponentSize * componentCount);
568 
569         priv::CopyPackedRGB<isSigned, normalized, true, toHalf>(
570             (packedValue >> redShift) & rgbMask, offsetOutput + (0 * outputComponentSize));
571         priv::CopyPackedRGB<isSigned, normalized, true, toHalf>(
572             (packedValue >> greenShift) & rgbMask, offsetOutput + (1 * outputComponentSize));
573         priv::CopyPackedRGB<isSigned, normalized, true, toHalf>(
574             (packedValue >> blueShift) & rgbMask, offsetOutput + (2 * outputComponentSize));
575         priv::CopyPackedAlpha<isSigned, normalized, true, toHalf>(
576             (packedValue >> alphaShift) & alphaMask, offsetOutput + (3 * outputComponentSize));
577     }
578 }
579 }  // namespace rx
580