1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file TilingFunctions.h
24 *
25 * @brief Tiling functions.
26 *
27 ******************************************************************************/
28 #pragma once
29 
30 #include "core/state.h"
31 #include "core/format_traits.h"
32 #include "memory/tilingtraits.h"
33 
34 #include <algorithm>
35 
36 #define MAX_NUM_LOD 15
37 
38 #define GFX_ALIGN(x, a) (((x) + ((a) - 1)) - (((x) + ((a) - 1)) & ((a) - 1))) // Alt implementation with bitwise not (~) has issue with uint32 align used with 64-bit value, since ~'ed value will remain 32-bit.
39 
40 //////////////////////////////////////////////////////////////////////////
41 /// SimdTile SSE(2x2), AVX(4x2), or AVX-512(4x4?)
42 //////////////////////////////////////////////////////////////////////////
43 template<SWR_FORMAT HotTileFormat, SWR_FORMAT SrcOrDstFormat>
44 struct SimdTile
45 {
46     // SimdTile is SOA (e.g. rrrrrrrr gggggggg bbbbbbbb aaaaaaaa )
47     float color[FormatTraits<HotTileFormat>::numComps][KNOB_SIMD_WIDTH];
48 
49     //////////////////////////////////////////////////////////////////////////
50     /// @brief Retrieve color from simd.
51     /// @param index - linear index to color within simd.
52     /// @param outputColor - output color
GetSwizzledColorSimdTile53     INLINE void GetSwizzledColor(
54         uint32_t index,
55         float outputColor[4])
56     {
57         // SOA pattern for 2x2 is a subset of 4x2.
58         //   0 1 4 5
59         //   2 3 6 7
60         // The offset converts pattern to linear
61 #if (SIMD_TILE_X_DIM == 4)
62         static const uint32_t offset[] = { 0, 1, 4, 5, 2, 3, 6, 7 };
63 #elif (SIMD_TILE_X_DIM == 2)
64         static const uint32_t offset[] = { 0, 1, 2, 3 };
65 #endif
66 
67         for (uint32_t i = 0; i < FormatTraits<SrcOrDstFormat>::numComps; ++i)
68         {
69             outputColor[i] = this->color[FormatTraits<SrcOrDstFormat>::swizzle(i)][offset[index]];
70         }
71     }
72 
73     //////////////////////////////////////////////////////////////////////////
74     /// @brief Retrieve color from simd.
75     /// @param index - linear index to color within simd.
76     /// @param outputColor - output color
SetSwizzledColorSimdTile77     INLINE void SetSwizzledColor(
78         uint32_t index,
79         const float src[4])
80     {
81         // SOA pattern for 2x2 is a subset of 4x2.
82         //   0 1 4 5
83         //   2 3 6 7
84         // The offset converts pattern to linear
85 #if (SIMD_TILE_X_DIM == 4)
86         static const uint32_t offset[] = { 0, 1, 4, 5, 2, 3, 6, 7 };
87 #elif (SIMD_TILE_X_DIM == 2)
88         static const uint32_t offset[] = { 0, 1, 2, 3 };
89 #endif
90 
91         // Only loop over the components needed for destination.
92         for (uint32_t i = 0; i < FormatTraits<SrcOrDstFormat>::numComps; ++i)
93         {
94             this->color[i][offset[index]] = src[i];
95         }
96     }
97 };
98 
99 template<>
100 struct SimdTile <R8_UINT,R8_UINT>
101 {
102     // SimdTile is SOA (e.g. rrrrrrrr gggggggg bbbbbbbb aaaaaaaa )
103     uint8_t color[FormatTraits<R8_UINT>::numComps][KNOB_SIMD_WIDTH];
104 
105     //////////////////////////////////////////////////////////////////////////
106     /// @brief Retrieve color from simd.
107     /// @param index - linear index to color within simd.
108     /// @param outputColor - output color
109     INLINE void GetSwizzledColor(
110         uint32_t index,
111         float outputColor[4])
112     {
113         // SOA pattern for 2x2 is a subset of 4x2.
114         //   0 1 4 5
115         //   2 3 6 7
116         // The offset converts pattern to linear
117 #if (SIMD_TILE_X_DIM == 4)
118         static const uint32_t offset[] = { 0, 1, 4, 5, 2, 3, 6, 7 };
119 #elif (SIMD_TILE_X_DIM == 2)
120         static const uint32_t offset[] = { 0, 1, 2, 3 };
121 #endif
122 
123         for (uint32_t i = 0; i < FormatTraits<R8_UINT>::numComps; ++i)
124         {
125             uint32_t src = this->color[FormatTraits<R8_UINT>::swizzle(i)][offset[index]];
126             outputColor[i] = *(float*)&src;
127         }
128     }
129 
130     //////////////////////////////////////////////////////////////////////////
131     /// @brief Retrieve color from simd.
132     /// @param index - linear index to color within simd.
133     /// @param outputColor - output color
134     INLINE void SetSwizzledColor(
135         uint32_t index,
136         const float src[4])
137     {
138         // SOA pattern for 2x2 is a subset of 4x2.
139         //   0 1 4 5
140         //   2 3 6 7
141         // The offset converts pattern to linear
142 #if (SIMD_TILE_X_DIM == 4)
143         static const uint32_t offset[] = { 0, 1, 4, 5, 2, 3, 6, 7 };
144 #elif (SIMD_TILE_X_DIM == 2)
145         static const uint32_t offset[] = { 0, 1, 2, 3 };
146 #endif
147 
148         // Only loop over the components needed for destination.
149         for (uint32_t i = 0; i < FormatTraits<R8_UINT>::numComps; ++i)
150         {
151             this->color[i][offset[index]] = *(uint8_t*)&src[i];
152         }
153     }
154 };
155 
156 #if ENABLE_AVX512_SIMD16
157 //////////////////////////////////////////////////////////////////////////
158 /// SimdTile 8x2 for AVX-512
159 //////////////////////////////////////////////////////////////////////////
160 
161 template<SWR_FORMAT HotTileFormat, SWR_FORMAT SrcOrDstFormat>
162 struct SimdTile_16
163 {
164     // SimdTile is SOA (e.g. rrrrrrrrrrrrrrrr gggggggggggggggg bbbbbbbbbbbbbbbb aaaaaaaaaaaaaaaa )
165     float color[FormatTraits<HotTileFormat>::numComps][KNOB_SIMD16_WIDTH];
166 
167     //////////////////////////////////////////////////////////////////////////
168     /// @brief Retrieve color from simd.
169     /// @param index - linear index to color within simd.
170     /// @param outputColor - output color
171     INLINE void GetSwizzledColor(
172         uint32_t index,
173         float outputColor[4])
174     {
175         // SOA pattern for 8x2..
176         //   0 1 4 5 8 9 C D
177         //   2 3 6 7 A B E F
178         // The offset converts pattern to linear
179         static const uint32_t offset[KNOB_SIMD16_WIDTH] = { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 };
180 
181         for (uint32_t i = 0; i < FormatTraits<SrcOrDstFormat>::numComps; ++i)
182         {
183             outputColor[i] = this->color[FormatTraits<SrcOrDstFormat>::swizzle(i)][offset[index]];
184         }
185     }
186 
187     //////////////////////////////////////////////////////////////////////////
188     /// @brief Retrieve color from simd.
189     /// @param index - linear index to color within simd.
190     /// @param outputColor - output color
191     INLINE void SetSwizzledColor(
192         uint32_t index,
193         const float src[4])
194     {
195         // SOA pattern for 8x2..
196         //   0 1 4 5 8 9 C D
197         //   2 3 6 7 A B E F
198         // The offset converts pattern to linear
199         static const uint32_t offset[KNOB_SIMD16_WIDTH] = { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 };
200 
201         for (uint32_t i = 0; i < FormatTraits<SrcOrDstFormat>::numComps; ++i)
202         {
203             this->color[i][offset[index]] = src[i];
204         }
205     }
206 };
207 
208 template<>
209 struct SimdTile_16 <R8_UINT, R8_UINT>
210 {
211     // SimdTile is SOA (e.g. rrrrrrrrrrrrrrrr gggggggggggggggg bbbbbbbbbbbbbbbb aaaaaaaaaaaaaaaa )
212     uint8_t color[FormatTraits<R8_UINT>::numComps][KNOB_SIMD16_WIDTH];
213 
214     //////////////////////////////////////////////////////////////////////////
215     /// @brief Retrieve color from simd.
216     /// @param index - linear index to color within simd.
217     /// @param outputColor - output color
218     INLINE void GetSwizzledColor(
219         uint32_t index,
220         float outputColor[4])
221     {
222         // SOA pattern for 8x2..
223         //   0 1 4 5 8 9 C D
224         //   2 3 6 7 A B E F
225         // The offset converts pattern to linear
226         static const uint32_t offset[KNOB_SIMD16_WIDTH] = { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 };
227 
228         for (uint32_t i = 0; i < FormatTraits<R8_UINT>::numComps; ++i)
229         {
230             uint32_t src = this->color[FormatTraits<R8_UINT>::swizzle(i)][offset[index]];
231             outputColor[i] = *(float*)&src;
232         }
233     }
234 
235     //////////////////////////////////////////////////////////////////////////
236     /// @brief Retrieve color from simd.
237     /// @param index - linear index to color within simd.
238     /// @param outputColor - output color
239     INLINE void SetSwizzledColor(
240         uint32_t index,
241         const float src[4])
242     {
243         // SOA pattern for 8x2..
244         //   0 1 4 5 8 9 C D
245         //   2 3 6 7 A B E F
246         // The offset converts pattern to linear
247         static const uint32_t offset[KNOB_SIMD16_WIDTH] = { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 };
248 
249         for (uint32_t i = 0; i < FormatTraits<R8_UINT>::numComps; ++i)
250         {
251             this->color[i][offset[index]] = *(uint8_t*)&src[i];
252         }
253     }
254 };
255 
256 #endif
257 //////////////////////////////////////////////////////////////////////////
258 /// @brief Computes lod offset for 1D surface at specified lod.
259 /// @param baseWidth - width of basemip (mip 0).
260 /// @param hAlign - horizontal alignment per miip, in texels
261 /// @param lod - lod index
262 /// @param offset - output offset.
263 INLINE void ComputeLODOffset1D(
264     const SWR_FORMAT_INFO& info,
265     uint32_t baseWidth,
266     uint32_t hAlign,
267     uint32_t lod,
268     uint32_t &offset)
269 {
270     if (lod == 0)
271     {
272         offset = 0;
273     }
274     else
275     {
276         uint32_t curWidth = baseWidth;
277         // @note hAlign is already in blocks for compressed formats so upconvert
278         //       so that we have the desired alignment post-divide.
279         if (info.isBC)
280         {
281             hAlign *= info.bcWidth;
282         }
283 
284         offset = GFX_ALIGN(curWidth, hAlign);
285         for (uint32_t l = 1; l < lod; ++l)
286         {
287             curWidth = std::max<uint32_t>(curWidth >> 1, 1U);
288             offset += GFX_ALIGN(curWidth, hAlign);
289         }
290 
291         if (info.isSubsampled || info.isBC)
292         {
293             offset /= info.bcWidth;
294         }
295     }
296 }
297 
298 //////////////////////////////////////////////////////////////////////////
299 /// @brief Computes x lod offset for 2D surface at specified lod.
300 /// @param baseWidth - width of basemip (mip 0).
301 /// @param hAlign - horizontal alignment per mip, in texels
302 /// @param lod - lod index
303 /// @param offset - output offset.
304 INLINE void ComputeLODOffsetX(
305     const SWR_FORMAT_INFO& info,
306     uint32_t baseWidth,
307     uint32_t hAlign,
308     uint32_t lod,
309     uint32_t &offset)
310 {
311     if (lod < 2)
312     {
313         offset = 0;
314     }
315     else
316     {
317         uint32_t curWidth = baseWidth;
318         // @note hAlign is already in blocks for compressed formats so upconvert
319         //       so that we have the desired alignment post-divide.
320         if (info.isBC)
321         {
322             hAlign *= info.bcWidth;
323         }
324 
325         curWidth = std::max<uint32_t>(curWidth >> 1, 1U);
326         curWidth = GFX_ALIGN(curWidth, hAlign);
327 
328         if (info.isSubsampled || info.isBC)
329         {
330             curWidth /= info.bcWidth;
331         }
332 
333         offset = curWidth;
334     }
335 }
336 
337 //////////////////////////////////////////////////////////////////////////
338 /// @brief Computes y lod offset for 2D surface at specified lod.
339 /// @param baseWidth - width of basemip (mip 0).
340 /// @param vAlign - vertical alignment per mip, in rows
341 /// @param lod - lod index
342 /// @param offset - output offset.
343 INLINE void ComputeLODOffsetY(
344     const SWR_FORMAT_INFO& info,
345     uint32_t baseHeight,
346     uint32_t vAlign,
347     uint32_t lod,
348     uint32_t &offset)
349 {
350     if (lod == 0)
351     {
352         offset = 0;
353     }
354     else
355     {
356         offset = 0;
357         uint32_t mipHeight = baseHeight;
358 
359         // @note vAlign is already in blocks for compressed formats so upconvert
360         //       so that we have the desired alignment post-divide.
361         if (info.isBC)
362         {
363             vAlign *= info.bcHeight;
364         }
365 
366         for (uint32_t l = 1; l <= lod; ++l)
367         {
368             uint32_t alignedMipHeight = GFX_ALIGN(mipHeight, vAlign);
369             offset += ((l != 2) ? alignedMipHeight : 0);
370             mipHeight = std::max<uint32_t>(mipHeight >> 1, 1U);
371         }
372 
373         if (info.isBC)
374         {
375             offset /= info.bcHeight;
376         }
377     }
378 }
379 
380 //////////////////////////////////////////////////////////////////////////
381 /// @brief Computes 1D surface offset
382 /// @param x - offset from start of array slice at given lod.
383 /// @param array - array slice index
384 /// @param lod - lod index
385 /// @param pState - surface state
386 /// @param xOffsetBytes - output offset in bytes.
387 template<bool UseCachedOffsets>
388 INLINE void ComputeSurfaceOffset1D(
389     uint32_t x,
390     uint32_t array,
391     uint32_t lod,
392     const SWR_SURFACE_STATE *pState,
393     uint32_t &xOffsetBytes)
394 {
395     const SWR_FORMAT_INFO &info = GetFormatInfo(pState->format);
396     uint32_t lodOffset;
397 
398     if (UseCachedOffsets)
399     {
400         lodOffset = pState->lodOffsets[0][lod];
401     }
402     else
403     {
404         ComputeLODOffset1D(info, pState->width, pState->halign, lod, lodOffset);
405     }
406 
407     xOffsetBytes = (array * pState->qpitch + lodOffset + x) * info.Bpp;
408 }
409 
410 //////////////////////////////////////////////////////////////////////////
411 /// @brief Adjusts the array slice for legacy TileY MSAA
412 /// @param pState - surface state
413 /// @param array - array slice index
414 /// @param sampleNum - requested sample
415 INLINE void AdjustCoordsForMSAA(const SWR_SURFACE_STATE *pState, uint32_t& x, uint32_t& y, uint32_t& arrayIndex, uint32_t sampleNum)
416 {
417     /// @todo: might want to templatize adjusting for sample slices when we support tileYS/tileYF.
418     if((pState->tileMode == SWR_TILE_MODE_YMAJOR ||
419         pState->tileMode == SWR_TILE_MODE_WMAJOR) &&
420        pState->bInterleavedSamples)
421     {
422         uint32_t newX, newY, newSampleX, newSampleY;
423         switch(pState->numSamples)
424         {
425         case 1:
426             newX = x;
427             newY = y;
428             newSampleX = newSampleY = 0;
429             break;
430         case 2:
431         {
432             assert(pState->type == SURFACE_2D);
433             static const uint32_t xMask = 0xFFFFFFFD;
434             static const uint32_t sampleMaskX = 0x1;
435             newX = pdep_u32(x, xMask);
436             newY = y;
437             newSampleX = pext_u32(sampleNum, sampleMaskX);
438             newSampleY = 0;
439         }
440             break;
441         case 4:
442         {
443             assert(pState->type == SURFACE_2D);
444             static const uint32_t mask = 0xFFFFFFFD;
445             static const uint32_t sampleMaskX = 0x1;
446             static const uint32_t sampleMaskY = 0x2;
447             newX = pdep_u32(x, mask);
448             newY = pdep_u32(y, mask);
449             newSampleX = pext_u32(sampleNum, sampleMaskX);
450             newSampleY = pext_u32(sampleNum, sampleMaskY);
451         }
452             break;
453         case 8:
454         {
455             assert(pState->type == SURFACE_2D);
456             static const uint32_t xMask = 0xFFFFFFF9;
457             static const uint32_t yMask = 0xFFFFFFFD;
458             static const uint32_t sampleMaskX = 0x5;
459             static const uint32_t sampleMaskY = 0x2;
460             newX = pdep_u32(x, xMask);
461             newY = pdep_u32(y, yMask);
462             newSampleX = pext_u32(sampleNum, sampleMaskX);
463             newSampleY = pext_u32(sampleNum, sampleMaskY);
464         }
465             break;
466         case 16:
467         {
468             assert(pState->type == SURFACE_2D);
469             static const uint32_t mask = 0xFFFFFFF9;
470             static const uint32_t sampleMaskX = 0x5;
471             static const uint32_t sampleMaskY = 0xA;
472             newX = pdep_u32(x, mask);
473             newY = pdep_u32(y, mask);
474             newSampleX = pext_u32(sampleNum, sampleMaskX);
475             newSampleY = pext_u32(sampleNum, sampleMaskY);
476         }
477             break;
478         default:
479             assert(0 && "Unsupported sample count");
480             newX = newY = 0;
481             newSampleX = newSampleY = 0;
482             break;
483         }
484         x = newX | (newSampleX << 1);
485         y = newY | (newSampleY << 1);
486     }
487     else if(pState->tileMode == SWR_TILE_MODE_YMAJOR ||
488             pState->tileMode == SWR_TILE_NONE)
489     {
490         uint32_t sampleShift;
491         switch(pState->numSamples)
492         {
493         case 1:
494             assert(sampleNum == 0);
495             sampleShift = 0;
496             break;
497         case 2:
498             assert(pState->type == SURFACE_2D);
499             sampleShift = 1;
500             break;
501         case 4:
502             assert(pState->type == SURFACE_2D);
503             sampleShift = 2;
504             break;
505         case 8:
506             assert(pState->type == SURFACE_2D);
507             sampleShift = 3;
508             break;
509         case 16:
510             assert(pState->type == SURFACE_2D);
511             sampleShift = 4;
512             break;
513         default:
514             assert(0 && "Unsupported sample count");
515             sampleShift = 0;
516             break;
517         }
518         arrayIndex = (arrayIndex << sampleShift) | sampleNum;
519     }
520 }
521 
522 //////////////////////////////////////////////////////////////////////////
523 /// @brief Computes 2D surface offset
524 /// @param x - horizontal offset from start of array slice and lod.
525 /// @param y - vertical offset from start of array slice and lod.
526 /// @param array - array slice index
527 /// @param lod - lod index
528 /// @param pState - surface state
529 /// @param xOffsetBytes - output x offset in bytes.
530 /// @param yOffsetRows - output y offset in bytes.
531 template<bool UseCachedOffsets>
532 INLINE void ComputeSurfaceOffset2D(uint32_t x, uint32_t y, uint32_t array, uint32_t sampleNum, uint32_t lod, const SWR_SURFACE_STATE *pState, uint32_t &xOffsetBytes, uint32_t &yOffsetRows)
533 {
534     const SWR_FORMAT_INFO &info = GetFormatInfo(pState->format);
535     uint32_t lodOffsetX, lodOffsetY;
536 
537     if (UseCachedOffsets)
538     {
539         lodOffsetX = pState->lodOffsets[0][lod];
540         lodOffsetY = pState->lodOffsets[1][lod];
541     }
542     else
543     {
544         ComputeLODOffsetX(info, pState->width, pState->halign, lod, lodOffsetX);
545         ComputeLODOffsetY(info, pState->height, pState->valign, lod, lodOffsetY);
546     }
547 
548     AdjustCoordsForMSAA(pState, x, y, array, sampleNum);
549     xOffsetBytes = (x + lodOffsetX + pState->xOffset) * info.Bpp;
550     yOffsetRows = (array * pState->qpitch) + lodOffsetY + y + pState->yOffset;
551 }
552 
553 //////////////////////////////////////////////////////////////////////////
554 /// @brief Computes 3D surface offset
555 /// @param x - horizontal offset from start of array slice and lod.
556 /// @param y - vertical offset from start of array slice and lod.
557 /// @param z - depth offset from start of array slice and lod.
558 /// @param lod - lod index
559 /// @param pState - surface state
560 /// @param xOffsetBytes - output x offset in bytes.
561 /// @param yOffsetRows - output y offset in rows.
562 /// @param zOffsetSlices - output y offset in slices.
563 template<bool UseCachedOffsets>
564 INLINE void ComputeSurfaceOffset3D(uint32_t x, uint32_t y, uint32_t z, uint32_t lod, const SWR_SURFACE_STATE *pState, uint32_t &xOffsetBytes, uint32_t &yOffsetRows, uint32_t &zOffsetSlices)
565 {
566     const SWR_FORMAT_INFO &info = GetFormatInfo(pState->format);
567     uint32_t lodOffsetX, lodOffsetY;
568 
569     if (UseCachedOffsets)
570     {
571         lodOffsetX = pState->lodOffsets[0][lod];
572         lodOffsetY = pState->lodOffsets[1][lod];
573     }
574     else
575     {
576         ComputeLODOffsetX(info, pState->width, pState->halign, lod, lodOffsetX);
577         ComputeLODOffsetY(info, pState->height, pState->valign, lod, lodOffsetY);
578     }
579 
580     xOffsetBytes = (x + lodOffsetX) * info.Bpp;
581     yOffsetRows = lodOffsetY + y;
582     zOffsetSlices = z;
583 }
584 
585 //////////////////////////////////////////////////////////////////////////
586 /// @brief Swizzles the linear x,y offsets depending on surface tiling mode
587 ///        and returns final surface address
588 /// @param xOffsetBytes - x offset from base of surface in bytes
589 /// @param yOffsetRows - y offset from base of surface in rows
590 /// @param pState - pointer to the surface state
591 template<typename TTraits>
592 INLINE uint32_t ComputeTileSwizzle2D(uint32_t xOffsetBytes, uint32_t yOffsetRows, const SWR_SURFACE_STATE *pState)
593 {
594     return ComputeOffset2D<TTraits>(pState->pitch, xOffsetBytes, yOffsetRows);
595 }
596 
597 //////////////////////////////////////////////////////////////////////////
598 /// @brief Swizzles the linear x,y offsets depending on surface tiling mode
599 ///        and returns final surface address
600 /// @param xOffsetBytes - x offset from base of surface in bytes
601 /// @param yOffsetRows - y offset from base of surface in rows
602 /// @param pState - pointer to the surface state
603 template<typename TTraits>
604 INLINE uint32_t ComputeTileSwizzle3D(uint32_t xOffsetBytes, uint32_t yOffsetRows, uint32_t zOffsetSlices, const SWR_SURFACE_STATE *pState)
605 {
606     return ComputeOffset3D<TTraits>(pState->qpitch, pState->pitch, xOffsetBytes, yOffsetRows, zOffsetSlices);
607 }
608 
609 //////////////////////////////////////////////////////////////////////////
610 /// @brief Swizzles the linear x,y offsets depending on surface tiling mode
611 ///        and returns final surface address
612 /// @param xOffsetBytes - x offset from base of surface in bytes
613 /// @param yOffsetRows - y offset from base of surface in rows
614 /// @param pState - pointer to the surface state
615 INLINE
616 uint32_t TileSwizzle2D(uint32_t xOffsetBytes, uint32_t yOffsetRows, const SWR_SURFACE_STATE *pState)
617 {
618     switch (pState->tileMode)
619     {
620     case SWR_TILE_NONE: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_NONE, 32> >(xOffsetBytes, yOffsetRows, pState);
621     case SWR_TILE_SWRZ: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_SWRZ, 32> >(xOffsetBytes, yOffsetRows, pState);
622     case SWR_TILE_MODE_XMAJOR: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_MODE_XMAJOR, 8> >(xOffsetBytes, yOffsetRows, pState);
623     case SWR_TILE_MODE_YMAJOR: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_MODE_YMAJOR, 32> >(xOffsetBytes, yOffsetRows, pState);
624     case SWR_TILE_MODE_WMAJOR: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_MODE_WMAJOR, 8> >(xOffsetBytes, yOffsetRows, pState);
625     default: SWR_INVALID("Unsupported tiling mode");
626     }
627     return 0;
628 }
629 
630 //////////////////////////////////////////////////////////////////////////
631 /// @brief Swizzles the linear x,y,z offsets depending on surface tiling mode
632 ///        and returns final surface address
633 /// @param xOffsetBytes - x offset from base of surface in bytes
634 /// @param yOffsetRows - y offset from base of surface in rows
635 /// @param zOffsetSlices - z offset from base of surface in slices
636 /// @param pState - pointer to the surface state
637 INLINE
638 uint32_t TileSwizzle3D(uint32_t xOffsetBytes, uint32_t yOffsetRows, uint32_t zOffsetSlices, const SWR_SURFACE_STATE *pState)
639 {
640     switch (pState->tileMode)
641     {
642     case SWR_TILE_NONE: return ComputeTileSwizzle3D<TilingTraits<SWR_TILE_NONE, 32> >(xOffsetBytes, yOffsetRows, zOffsetSlices, pState);
643     case SWR_TILE_SWRZ: return ComputeTileSwizzle3D<TilingTraits<SWR_TILE_SWRZ, 32> >(xOffsetBytes, yOffsetRows, zOffsetSlices, pState);
644     case SWR_TILE_MODE_YMAJOR: return ComputeTileSwizzle3D<TilingTraits<SWR_TILE_MODE_YMAJOR, 32> >(xOffsetBytes, yOffsetRows, zOffsetSlices, pState);
645     default: SWR_INVALID("Unsupported tiling mode");
646     }
647     return 0;
648 }
649 
650 template<bool UseCachedOffsets>
651 INLINE
652 uint32_t ComputeSurfaceOffset(uint32_t x, uint32_t y, uint32_t z, uint32_t array, uint32_t sampleNum, uint32_t lod, const SWR_SURFACE_STATE *pState)
653 {
654     uint32_t offsetX = 0, offsetY = 0, offsetZ = 0;
655     switch (pState->type)
656     {
657     case SURFACE_BUFFER:
658     case SURFACE_STRUCTURED_BUFFER:
659         offsetX = x * pState->pitch;
660         return offsetX;
661         break;
662     case SURFACE_1D:
663         ComputeSurfaceOffset1D<UseCachedOffsets>(x, array, lod, pState, offsetX);
664         return TileSwizzle2D(offsetX, 0, pState);
665         break;
666     case SURFACE_2D:
667         ComputeSurfaceOffset2D<UseCachedOffsets>(x, y, array, sampleNum, lod, pState, offsetX, offsetY);
668         return TileSwizzle2D(offsetX, offsetY, pState);
669     case SURFACE_3D:
670         ComputeSurfaceOffset3D<UseCachedOffsets>(x, y, z, lod, pState, offsetX, offsetY, offsetZ);
671         return TileSwizzle3D(offsetX, offsetY, offsetZ, pState);
672         break;
673     case SURFACE_CUBE:
674         ComputeSurfaceOffset2D<UseCachedOffsets>(x, y, array, sampleNum, lod, pState, offsetX, offsetY);
675         return TileSwizzle2D(offsetX, offsetY, pState);
676         break;
677     default: SWR_INVALID("Unsupported format");
678     }
679 
680     return 0;
681 }
682 
683 typedef void*(*PFN_COMPUTESURFADDR)(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t, uint32_t, const SWR_SURFACE_STATE*);
684 
685 //////////////////////////////////////////////////////////////////////////
686 /// @brief Computes surface address at the given location and lod
687 /// @param x - x location in pixels
688 /// @param y - y location in rows
689 /// @param z - z location for 3D surfaces
690 /// @param array - array slice for 1D and 2D surfaces
691 /// @param lod - level of detail
692 /// @param pState - pointer to the surface state
693 template<bool UseCachedOffsets, bool IsRead>
694 INLINE
695 void* ComputeSurfaceAddress(uint32_t x, uint32_t y, uint32_t z, uint32_t array, uint32_t sampleNum, uint32_t lod, const SWR_SURFACE_STATE *pState)
696 {
697     return (void*)(pState->xpBaseAddress + ComputeSurfaceOffset<UseCachedOffsets>(x, y, z, array, sampleNum, lod, pState));
698 }
699