1 /*
2  * Copyright © 2017 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining
6  * a copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16  * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17  * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * The above copyright notice and this permission notice (including the
23  * next paragraph) shall be included in all copies or substantial portions
24  * of the Software.
25  */
26 
27 /**
28 ************************************************************************************************************************
29 * @file  gfx9addrlib.cpp
30 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
31 ************************************************************************************************************************
32 */
33 
34 #include "gfx9addrlib.h"
35 
36 #include "gfx9_gb_reg.h"
37 
38 #include "amdgpu_asic_addr.h"
39 
40 ////////////////////////////////////////////////////////////////////////////////////////////////////
41 ////////////////////////////////////////////////////////////////////////////////////////////////////
42 
43 namespace Addr
44 {
45 
46 /**
47 ************************************************************************************************************************
48 *   Gfx9HwlInit
49 *
50 *   @brief
51 *       Creates an Gfx9Lib object.
52 *
53 *   @return
54 *       Returns an Gfx9Lib object pointer.
55 ************************************************************************************************************************
56 */
Gfx9HwlInit(const Client * pClient)57 Addr::Lib* Gfx9HwlInit(const Client* pClient)
58 {
59     return V2::Gfx9Lib::CreateObj(pClient);
60 }
61 
62 namespace V2
63 {
64 
65 ////////////////////////////////////////////////////////////////////////////////////////////////////
66 //                               Static Const Member
67 ////////////////////////////////////////////////////////////////////////////////////////////////////
68 
69 const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
70 {//Linear 256B  4KB  64KB   Var    Z    Std   Disp  Rot   XOR    T     RtOpt
71     {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_LINEAR
72     {0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_256B_S
73     {0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_256B_D
74     {0,    1,    0,    0,    0,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_256B_R
75 
76     {0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_Z
77     {0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_S
78     {0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_4KB_D
79     {0,    0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_4KB_R
80 
81     {0,    0,    0,    1,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_Z
82     {0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_S
83     {0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_64KB_D
84     {0,    0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_64KB_R
85 
86     {0,    0,    0,    0,    1,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_VAR_Z
87     {0,    0,    0,    0,    1,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_VAR_S
88     {0,    0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_VAR_D
89     {0,    0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_VAR_R
90 
91     {0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    1,    0}, // ADDR_SW_64KB_Z_T
92     {0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    1,    0}, // ADDR_SW_64KB_S_T
93     {0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    1,    0}, // ADDR_SW_64KB_D_T
94     {0,    0,    0,    1,    0,    0,    0,    0,    1,    1,    1,    0}, // ADDR_SW_64KB_R_T
95 
96     {0,    0,    1,    0,    0,    1,    0,    0,    0,    1,    0,    0}, // ADDR_SW_4KB_Z_x
97     {0,    0,    1,    0,    0,    0,    1,    0,    0,    1,    0,    0}, // ADDR_SW_4KB_S_x
98     {0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0,    0}, // ADDR_SW_4KB_D_x
99     {0,    0,    1,    0,    0,    0,    0,    0,    1,    1,    0,    0}, // ADDR_SW_4KB_R_x
100 
101     {0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    0,    0}, // ADDR_SW_64KB_Z_X
102     {0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    0,    0}, // ADDR_SW_64KB_S_X
103     {0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    0,    0}, // ADDR_SW_64KB_D_X
104     {0,    0,    0,    1,    0,    0,    0,    0,    1,    1,    0,    0}, // ADDR_SW_64KB_R_X
105 
106     {0,    0,    0,    0,    1,    1,    0,    0,    0,    1,    0,    0}, // ADDR_SW_VAR_Z_X
107     {0,    0,    0,    0,    1,    0,    1,    0,    0,    1,    0,    0}, // ADDR_SW_VAR_S_X
108     {0,    0,    0,    0,    1,    0,    0,    1,    0,    1,    0,    0}, // ADDR_SW_VAR_D_X
109     {0,    0,    0,    0,    1,    0,    0,    0,    1,    1,    0,    0}, // ADDR_SW_VAR_R_X
110     {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_LINEAR_GENERAL
111 };
112 
113 const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16,
114                                               8, 6, 5, 4, 3, 2, 1, 0};
115 
116 const Dim3d   Gfx9Lib::Block256_3dS[]  = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
117 
118 const Dim3d   Gfx9Lib::Block256_3dZ[]  = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
119 
120 /**
121 ************************************************************************************************************************
122 *   Gfx9Lib::Gfx9Lib
123 *
124 *   @brief
125 *       Constructor
126 *
127 ************************************************************************************************************************
128 */
Gfx9Lib(const Client * pClient)129 Gfx9Lib::Gfx9Lib(const Client* pClient)
130     :
131     Lib(pClient),
132     m_numEquations(0)
133 {
134     m_class = AI_ADDRLIB;
135     memset(&m_settings, 0, sizeof(m_settings));
136     memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
137 }
138 
139 /**
140 ************************************************************************************************************************
141 *   Gfx9Lib::~Gfx9Lib
142 *
143 *   @brief
144 *       Destructor
145 ************************************************************************************************************************
146 */
~Gfx9Lib()147 Gfx9Lib::~Gfx9Lib()
148 {
149 }
150 
151 /**
152 ************************************************************************************************************************
153 *   Gfx9Lib::HwlComputeHtileInfo
154 *
155 *   @brief
156 *       Interface function stub of AddrComputeHtilenfo
157 *
158 *   @return
159 *       ADDR_E_RETURNCODE
160 ************************************************************************************************************************
161 */
HwlComputeHtileInfo(const ADDR2_COMPUTE_HTILE_INFO_INPUT * pIn,ADDR2_COMPUTE_HTILE_INFO_OUTPUT * pOut) const162 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
163     const ADDR2_COMPUTE_HTILE_INFO_INPUT*    pIn,    ///< [in] input structure
164     ADDR2_COMPUTE_HTILE_INFO_OUTPUT*         pOut    ///< [out] output structure
165     ) const
166 {
167     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
168                                                        pIn->swizzleMode);
169 
170     UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
171 
172     UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
173 
174     if ((numPipeTotal == 1) && (numRbTotal == 1))
175     {
176         numCompressBlkPerMetaBlkLog2 = 10;
177     }
178     else
179     {
180         if (m_settings.applyAliasFix)
181         {
182             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
183         }
184         else
185         {
186             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
187         }
188     }
189 
190     numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
191 
192     Dim3d metaBlkDim = {8, 8, 1};
193     UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
194     UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
195     UINT_32 heightAmp = totalAmpBits - widthAmp;
196     metaBlkDim.w <<= widthAmp;
197     metaBlkDim.h <<= heightAmp;
198 
199 #if DEBUG
200     Dim3d metaBlkDimDbg = {8, 8, 1};
201     for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
202     {
203         if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
204             ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
205         {
206             metaBlkDimDbg.h <<= 1;
207         }
208         else
209         {
210             metaBlkDimDbg.w <<= 1;
211         }
212     }
213     ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
214 #endif
215 
216     UINT_32 numMetaBlkX;
217     UINT_32 numMetaBlkY;
218     UINT_32 numMetaBlkZ;
219 
220     GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
221                    pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
222                    &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
223 
224     UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
225 
226     if (m_settings.htileAlignFix)
227     {
228         sizeAlign <<= 1;
229     }
230 
231     pOut->pitch      = numMetaBlkX * metaBlkDim.w;
232     pOut->height     = numMetaBlkY * metaBlkDim.h;
233     pOut->sliceSize  = numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk * 4;
234 
235     pOut->metaBlkWidth = metaBlkDim.w;
236     pOut->metaBlkHeight = metaBlkDim.h;
237     pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
238 
239     pOut->baseAlign = Max(numCompressBlkPerMetaBlk * 4, sizeAlign);
240 
241     if (m_settings.metaBaseAlignFix)
242     {
243         pOut->baseAlign = Max(pOut->baseAlign, GetBlockSize(pIn->swizzleMode));
244     }
245 
246     if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
247     {
248         UINT_32 additionalAlign = numPipeTotal * numCompressBlkPerMetaBlk * 2;
249 
250         if (additionalAlign > sizeAlign)
251         {
252             sizeAlign = additionalAlign;
253         }
254     }
255 
256     pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
257 
258     return ADDR_OK;
259 }
260 
261 /**
262 ************************************************************************************************************************
263 *   Gfx9Lib::HwlComputeCmaskInfo
264 *
265 *   @brief
266 *       Interface function stub of AddrComputeCmaskInfo
267 *
268 *   @return
269 *       ADDR_E_RETURNCODE
270 ************************************************************************************************************************
271 */
HwlComputeCmaskInfo(const ADDR2_COMPUTE_CMASK_INFO_INPUT * pIn,ADDR2_COMPUTE_CMASK_INFO_OUTPUT * pOut) const272 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
273     const ADDR2_COMPUTE_CMASK_INFO_INPUT*    pIn,    ///< [in] input structure
274     ADDR2_COMPUTE_CMASK_INFO_OUTPUT*         pOut    ///< [out] output structure
275     ) const
276 {
277 // TODO: Clarify with AddrLib team
278 //     ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
279 
280     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
281                                                        pIn->swizzleMode);
282 
283     UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
284 
285     UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
286 
287     if ((numPipeTotal == 1) && (numRbTotal == 1))
288     {
289         numCompressBlkPerMetaBlkLog2 = 13;
290     }
291     else
292     {
293         if (m_settings.applyAliasFix)
294         {
295             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
296         }
297         else
298         {
299             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
300         }
301 
302         numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
303     }
304 
305     numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
306 
307     Dim2d metaBlkDim = {8, 8};
308     UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
309     UINT_32 heightAmp = totalAmpBits >> 1;
310     UINT_32 widthAmp = totalAmpBits - heightAmp;
311     metaBlkDim.w <<= widthAmp;
312     metaBlkDim.h <<= heightAmp;
313 
314 #if DEBUG
315     Dim2d metaBlkDimDbg = {8, 8};
316     for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
317     {
318         if (metaBlkDimDbg.h < metaBlkDimDbg.w)
319         {
320             metaBlkDimDbg.h <<= 1;
321         }
322         else
323         {
324             metaBlkDimDbg.w <<= 1;
325         }
326     }
327     ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
328 #endif
329 
330     UINT_32 numMetaBlkX = (pIn->unalignedWidth  + metaBlkDim.w - 1) / metaBlkDim.w;
331     UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
332     UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
333 
334     UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
335 
336     pOut->pitch      = numMetaBlkX * metaBlkDim.w;
337     pOut->height     = numMetaBlkY * metaBlkDim.h;
338     pOut->sliceSize  = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
339     pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
340     pOut->baseAlign  = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
341 
342     if (m_settings.metaBaseAlignFix)
343     {
344         pOut->baseAlign = Max(pOut->baseAlign, GetBlockSize(pIn->swizzleMode));
345     }
346 
347     pOut->metaBlkWidth = metaBlkDim.w;
348     pOut->metaBlkHeight = metaBlkDim.h;
349 
350     pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
351 
352     return ADDR_OK;
353 }
354 
355 /**
356 ************************************************************************************************************************
357 *   Gfx9Lib::GetMetaMipInfo
358 *
359 *   @brief
360 *       Get meta mip info
361 *
362 *   @return
363 *       N/A
364 ************************************************************************************************************************
365 */
GetMetaMipInfo(UINT_32 numMipLevels,Dim3d * pMetaBlkDim,BOOL_32 dataThick,ADDR2_META_MIP_INFO * pInfo,UINT_32 mip0Width,UINT_32 mip0Height,UINT_32 mip0Depth,UINT_32 * pNumMetaBlkX,UINT_32 * pNumMetaBlkY,UINT_32 * pNumMetaBlkZ) const366 VOID Gfx9Lib::GetMetaMipInfo(
367     UINT_32 numMipLevels,           ///< [in]  number of mip levels
368     Dim3d* pMetaBlkDim,             ///< [in]  meta block dimension
369     BOOL_32 dataThick,              ///< [in]  data surface is thick
370     ADDR2_META_MIP_INFO* pInfo,     ///< [out] meta mip info
371     UINT_32 mip0Width,              ///< [in]  mip0 width
372     UINT_32 mip0Height,             ///< [in]  mip0 height
373     UINT_32 mip0Depth,              ///< [in]  mip0 depth
374     UINT_32* pNumMetaBlkX,          ///< [out] number of metablock X in mipchain
375     UINT_32* pNumMetaBlkY,          ///< [out] number of metablock Y in mipchain
376     UINT_32* pNumMetaBlkZ)          ///< [out] number of metablock Z in mipchain
377     const
378 {
379     UINT_32 numMetaBlkX = (mip0Width  + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
380     UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
381     UINT_32 numMetaBlkZ = (mip0Depth  + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
382     UINT_32 tailWidth   = pMetaBlkDim->w;
383     UINT_32 tailHeight  = pMetaBlkDim->h >> 1;
384     UINT_32 tailDepth   = pMetaBlkDim->d;
385     BOOL_32 inTail      = FALSE;
386     AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
387 
388     if (numMipLevels > 1)
389     {
390         if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
391         {
392             // Z major
393             major = ADDR_MAJOR_Z;
394         }
395         else if (numMetaBlkX >= numMetaBlkY)
396         {
397             // X major
398             major = ADDR_MAJOR_X;
399         }
400         else
401         {
402             // Y major
403             major = ADDR_MAJOR_Y;
404         }
405 
406         inTail = ((mip0Width <= tailWidth) &&
407                   (mip0Height <= tailHeight) &&
408                   ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
409 
410         if (inTail == FALSE)
411         {
412             UINT_32 orderLimit;
413             UINT_32 *pMipDim;
414             UINT_32 *pOrderDim;
415 
416             if (major == ADDR_MAJOR_Z)
417             {
418                 // Z major
419                 pMipDim = &numMetaBlkY;
420                 pOrderDim = &numMetaBlkZ;
421                 orderLimit = 4;
422             }
423             else if (major == ADDR_MAJOR_X)
424             {
425                 // X major
426                 pMipDim = &numMetaBlkY;
427                 pOrderDim = &numMetaBlkX;
428                 orderLimit = 4;
429             }
430             else
431             {
432                 // Y major
433                 pMipDim = &numMetaBlkX;
434                 pOrderDim = &numMetaBlkY;
435                 orderLimit = 2;
436             }
437 
438             if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
439             {
440                 *pMipDim += 2;
441             }
442             else
443             {
444                 *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
445             }
446         }
447     }
448 
449     if (pInfo != NULL)
450     {
451         UINT_32 mipWidth  = mip0Width;
452         UINT_32 mipHeight = mip0Height;
453         UINT_32 mipDepth  = mip0Depth;
454         Dim3d   mipCoord  = {0};
455 
456         for (UINT_32 mip = 0; mip < numMipLevels; mip++)
457         {
458             if (inTail)
459             {
460                 GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
461                                    pMetaBlkDim);
462                 break;
463             }
464             else
465             {
466                 mipWidth  = PowTwoAlign(mipWidth, pMetaBlkDim->w);
467                 mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
468                 mipDepth  = PowTwoAlign(mipDepth, pMetaBlkDim->d);
469 
470                 pInfo[mip].inMiptail = FALSE;
471                 pInfo[mip].startX = mipCoord.w;
472                 pInfo[mip].startY = mipCoord.h;
473                 pInfo[mip].startZ = mipCoord.d;
474                 pInfo[mip].width  = mipWidth;
475                 pInfo[mip].height = mipHeight;
476                 pInfo[mip].depth  = dataThick ? mipDepth : 1;
477 
478                 if ((mip >= 3) || (mip & 1))
479                 {
480                     switch (major)
481                     {
482                         case ADDR_MAJOR_X:
483                             mipCoord.w += mipWidth;
484                             break;
485                         case ADDR_MAJOR_Y:
486                             mipCoord.h += mipHeight;
487                             break;
488                         case ADDR_MAJOR_Z:
489                             mipCoord.d += mipDepth;
490                             break;
491                         default:
492                             break;
493                     }
494                 }
495                 else
496                 {
497                     switch (major)
498                     {
499                         case ADDR_MAJOR_X:
500                             mipCoord.h += mipHeight;
501                             break;
502                         case ADDR_MAJOR_Y:
503                             mipCoord.w += mipWidth;
504                             break;
505                         case ADDR_MAJOR_Z:
506                             mipCoord.h += mipHeight;
507                             break;
508                         default:
509                             break;
510                     }
511                 }
512 
513                 mipWidth  = Max(mipWidth >> 1, 1u);
514                 mipHeight = Max(mipHeight >> 1, 1u);
515                 mipDepth = Max(mipDepth >> 1, 1u);
516 
517                 inTail = ((mipWidth <= tailWidth) &&
518                           (mipHeight <= tailHeight) &&
519                           ((dataThick == FALSE) || (mipDepth <= tailDepth)));
520             }
521         }
522     }
523 
524     *pNumMetaBlkX = numMetaBlkX;
525     *pNumMetaBlkY = numMetaBlkY;
526     *pNumMetaBlkZ = numMetaBlkZ;
527 }
528 
529 /**
530 ************************************************************************************************************************
531 *   Gfx9Lib::HwlComputeDccInfo
532 *
533 *   @brief
534 *       Interface function to compute DCC key info
535 *
536 *   @return
537 *       ADDR_E_RETURNCODE
538 ************************************************************************************************************************
539 */
HwlComputeDccInfo(const ADDR2_COMPUTE_DCCINFO_INPUT * pIn,ADDR2_COMPUTE_DCCINFO_OUTPUT * pOut) const540 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
541     const ADDR2_COMPUTE_DCCINFO_INPUT*    pIn,    ///< [in] input structure
542     ADDR2_COMPUTE_DCCINFO_OUTPUT*         pOut    ///< [out] output structure
543     ) const
544 {
545     BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
546     BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
547     BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
548 
549     if (dataLinear)
550     {
551         metaLinear = TRUE;
552     }
553     else if (metaLinear == TRUE)
554     {
555         pipeAligned = FALSE;
556     }
557 
558     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
559 
560     if (metaLinear)
561     {
562         // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
563         ADDR_ASSERT_ALWAYS();
564 
565         pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
566         pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
567     }
568     else
569     {
570         BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
571 
572         UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
573 
574         UINT_32 numFrags = Max(pIn->numFrags, 1u);
575         UINT_32 numSlices = Max(pIn->numSlices, 1u);
576 
577         minMetaBlkSize /= numFrags;
578 
579         UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
580 
581         UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
582 
583         if ((numPipeTotal > 1) || (numRbTotal > 1))
584         {
585             const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10);
586 
587             numCompressBlkPerMetaBlk =
588                 Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize));
589 
590             if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
591             {
592                 numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
593             }
594         }
595 
596         Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
597         Dim3d metaBlkDim = compressBlkDim;
598 
599         for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
600         {
601             if ((metaBlkDim.h < metaBlkDim.w) ||
602                 ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
603             {
604                 if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
605                 {
606                     metaBlkDim.h <<= 1;
607                 }
608                 else
609                 {
610                     metaBlkDim.d <<= 1;
611                 }
612             }
613             else
614             {
615                 if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
616                 {
617                     metaBlkDim.w <<= 1;
618                 }
619                 else
620                 {
621                     metaBlkDim.d <<= 1;
622                 }
623             }
624         }
625 
626         UINT_32 numMetaBlkX;
627         UINT_32 numMetaBlkY;
628         UINT_32 numMetaBlkZ;
629 
630         GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
631                        pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
632                        &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
633 
634         UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
635 
636         if (numFrags > m_maxCompFrag)
637         {
638             sizeAlign *= (numFrags / m_maxCompFrag);
639         }
640 
641         pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
642                            numCompressBlkPerMetaBlk * numFrags;
643         pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
644         pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
645 
646         if (m_settings.metaBaseAlignFix)
647         {
648             pOut->dccRamBaseAlign = Max(pOut->dccRamBaseAlign, GetBlockSize(pIn->swizzleMode));
649         }
650 
651         pOut->pitch = numMetaBlkX * metaBlkDim.w;
652         pOut->height = numMetaBlkY * metaBlkDim.h;
653         pOut->depth = numMetaBlkZ * metaBlkDim.d;
654 
655         pOut->compressBlkWidth = compressBlkDim.w;
656         pOut->compressBlkHeight = compressBlkDim.h;
657         pOut->compressBlkDepth = compressBlkDim.d;
658 
659         pOut->metaBlkWidth = metaBlkDim.w;
660         pOut->metaBlkHeight = metaBlkDim.h;
661         pOut->metaBlkDepth = metaBlkDim.d;
662 
663         pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
664         pOut->fastClearSizePerSlice =
665             pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
666     }
667 
668     return ADDR_OK;
669 }
670 
671 /**
672 ************************************************************************************************************************
673 *   Gfx9Lib::HwlGetMaxAlignments
674 *
675 *   @brief
676 *       Gets maximum alignments
677 *   @return
678 *       ADDR_E_RETURNCODE
679 ************************************************************************************************************************
680 */
HwlGetMaxAlignments(ADDR_GET_MAX_ALIGNMENTS_OUTPUT * pOut) const681 ADDR_E_RETURNCODE Gfx9Lib::HwlGetMaxAlignments(
682     ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut    ///< [out] output structure
683     ) const
684 {
685     pOut->baseAlign = HwlComputeSurfaceBaseAlign(ADDR_SW_64KB);
686 
687     return ADDR_OK;
688 }
689 
690 /**
691 ************************************************************************************************************************
692 *   Gfx9Lib::HwlComputeCmaskAddrFromCoord
693 *
694 *   @brief
695 *       Interface function stub of AddrComputeCmaskAddrFromCoord
696 *
697 *   @return
698 *       ADDR_E_RETURNCODE
699 ************************************************************************************************************************
700 */
HwlComputeCmaskAddrFromCoord(const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT * pOut)701 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
702     const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
703     ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut)   ///< [out] output structure
704 {
705     ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
706     input.size            = sizeof(input);
707     input.cMaskFlags      = pIn->cMaskFlags;
708     input.colorFlags      = pIn->colorFlags;
709     input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
710     input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
711     input.numSlices       = Max(pIn->numSlices, 1u);
712     input.swizzleMode     = pIn->swizzleMode;
713     input.resourceType    = pIn->resourceType;
714 
715     ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
716     output.size = sizeof(output);
717 
718     ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
719 
720     if (returnCode == ADDR_OK)
721     {
722         UINT_32 fmaskBpp              = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
723         UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
724         UINT_32 metaBlkWidthLog2      = Log2(output.metaBlkWidth);
725         UINT_32 metaBlkHeightLog2     = Log2(output.metaBlkHeight);
726 
727         const CoordEq* pMetaEq = GetMetaEquation({0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
728                                                   Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
729                                                   metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0});
730 
731         UINT_32 xb = pIn->x / output.metaBlkWidth;
732         UINT_32 yb = pIn->y / output.metaBlkHeight;
733         UINT_32 zb = pIn->slice;
734 
735         UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
736         UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
737         UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
738 
739         UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
740 
741         pOut->addr = address >> 1;
742         pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
743 
744 
745         UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
746                                                            pIn->swizzleMode);
747 
748         UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
749 
750         pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
751     }
752 
753     return returnCode;
754 }
755 
756 /**
757 ************************************************************************************************************************
758 *   Gfx9Lib::HwlComputeHtileAddrFromCoord
759 *
760 *   @brief
761 *       Interface function stub of AddrComputeHtileAddrFromCoord
762 *
763 *   @return
764 *       ADDR_E_RETURNCODE
765 ************************************************************************************************************************
766 */
HwlComputeHtileAddrFromCoord(const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT * pOut)767 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
768     const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
769     ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*        pOut)   ///< [out] output structure
770 {
771     ADDR_E_RETURNCODE returnCode = ADDR_OK;
772 
773     if (pIn->numMipLevels > 1)
774     {
775         returnCode = ADDR_NOTIMPLEMENTED;
776     }
777     else
778     {
779         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
780         input.size            = sizeof(input);
781         input.hTileFlags      = pIn->hTileFlags;
782         input.depthFlags      = pIn->depthflags;
783         input.swizzleMode     = pIn->swizzleMode;
784         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
785         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
786         input.numSlices       = Max(pIn->numSlices, 1u);
787         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
788 
789         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
790         output.size = sizeof(output);
791 
792         returnCode = ComputeHtileInfo(&input, &output);
793 
794         if (returnCode == ADDR_OK)
795         {
796             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
797             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
798             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
799             UINT_32 numSamplesLog2    = Log2(pIn->numSamples);
800 
801             const CoordEq* pMetaEq = GetMetaEquation({0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
802                                                       Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
803                                                       metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0});
804 
805             UINT_32 xb = pIn->x / output.metaBlkWidth;
806             UINT_32 yb = pIn->y / output.metaBlkHeight;
807             UINT_32 zb = pIn->slice;
808 
809             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
810             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
811             UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
812 
813             UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
814 
815             pOut->addr = address >> 1;
816 
817             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
818                                                                pIn->swizzleMode);
819 
820             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
821 
822             pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
823         }
824     }
825 
826     return returnCode;
827 }
828 
829 /**
830 ************************************************************************************************************************
831 *   Gfx9Lib::HwlComputeHtileCoordFromAddr
832 *
833 *   @brief
834 *       Interface function stub of AddrComputeHtileCoordFromAddr
835 *
836 *   @return
837 *       ADDR_E_RETURNCODE
838 ************************************************************************************************************************
839 */
HwlComputeHtileCoordFromAddr(const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT * pIn,ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT * pOut)840 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
841     const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
842     ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*        pOut)   ///< [out] output structure
843 {
844     ADDR_E_RETURNCODE returnCode = ADDR_OK;
845 
846     if (pIn->numMipLevels > 1)
847     {
848         returnCode = ADDR_NOTIMPLEMENTED;
849     }
850     else
851     {
852         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
853         input.size            = sizeof(input);
854         input.hTileFlags      = pIn->hTileFlags;
855         input.swizzleMode     = pIn->swizzleMode;
856         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
857         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
858         input.numSlices       = Max(pIn->numSlices, 1u);
859         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
860 
861         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
862         output.size = sizeof(output);
863 
864         returnCode = ComputeHtileInfo(&input, &output);
865 
866         if (returnCode == ADDR_OK)
867         {
868             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
869             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
870             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
871             UINT_32 numSamplesLog2    = Log2(pIn->numSamples);
872 
873             const CoordEq* pMetaEq = GetMetaEquation({0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
874                                                       Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
875                                                       metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0});
876 
877             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
878                                                                pIn->swizzleMode);
879 
880             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
881 
882             UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
883 
884             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
885             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
886 
887             UINT_32 x, y, z, s, m;
888             pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, x, y, z, s, m);
889 
890             pOut->slice = m / sliceSizeInBlock;
891             pOut->y     = ((m % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + y;
892             pOut->x     = (m % pitchInBlock) * output.metaBlkWidth + x;
893         }
894     }
895 
896     return returnCode;
897 }
898 
899 /**
900 ************************************************************************************************************************
901 *   Gfx9Lib::HwlComputeDccAddrFromCoord
902 *
903 *   @brief
904 *       Interface function stub of AddrComputeDccAddrFromCoord
905 *
906 *   @return
907 *       ADDR_E_RETURNCODE
908 ************************************************************************************************************************
909 */
HwlComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT * pOut)910 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
911     const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT*  pIn,
912     ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
913 {
914     ADDR_E_RETURNCODE returnCode = ADDR_OK;
915 
916     if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear)
917     {
918         returnCode = ADDR_NOTIMPLEMENTED;
919     }
920     else
921     {
922         ADDR2_COMPUTE_DCCINFO_INPUT input = {0};
923         input.size            = sizeof(input);
924         input.dccKeyFlags     = pIn->dccKeyFlags;
925         input.colorFlags      = pIn->colorFlags;
926         input.swizzleMode     = pIn->swizzleMode;
927         input.resourceType    = pIn->resourceType;
928         input.bpp             = pIn->bpp;
929         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
930         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
931         input.numSlices       = Max(pIn->numSlices, 1u);
932         input.numFrags        = Max(pIn->numFrags, 1u);
933         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
934 
935         ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0};
936         output.size = sizeof(output);
937 
938         returnCode = ComputeDccInfo(&input, &output);
939 
940         if (returnCode == ADDR_OK)
941         {
942             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
943             UINT_32 numSamplesLog2    = Log2(pIn->numFrags);
944             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
945             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
946             UINT_32 metaBlkDepthLog2  = Log2(output.metaBlkDepth);
947             UINT_32 compBlkWidthLog2  = Log2(output.compressBlkWidth);
948             UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight);
949             UINT_32 compBlkDepthLog2  = Log2(output.compressBlkDepth);
950 
951             const CoordEq* pMetaEq = GetMetaEquation({pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
952                                                       Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
953                                                       metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
954                                                       compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2});
955 
956             UINT_32 xb = pIn->x / output.metaBlkWidth;
957             UINT_32 yb = pIn->y / output.metaBlkHeight;
958             UINT_32 zb = pIn->slice / output.metaBlkDepth;
959 
960             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
961             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
962             UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
963 
964             UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex);
965 
966             pOut->addr = address >> 1;
967 
968             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
969                                                                pIn->swizzleMode);
970 
971             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
972 
973             pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
974         }
975     }
976 
977     return returnCode;
978 }
979 
980 /**
981 ************************************************************************************************************************
982 *   Gfx9Lib::HwlInitGlobalParams
983 *
984 *   @brief
985 *       Initializes global parameters
986 *
987 *   @return
988 *       TRUE if all settings are valid
989 *
990 ************************************************************************************************************************
991 */
HwlInitGlobalParams(const ADDR_CREATE_INPUT * pCreateIn)992 BOOL_32 Gfx9Lib::HwlInitGlobalParams(
993     const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
994 {
995     BOOL_32 valid = TRUE;
996 
997     if (m_settings.isArcticIsland)
998     {
999         GB_ADDR_CONFIG gbAddrConfig;
1000 
1001         gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
1002 
1003         // These values are copied from CModel code
1004         switch (gbAddrConfig.bits.NUM_PIPES)
1005         {
1006             case ADDR_CONFIG_1_PIPE:
1007                 m_pipes = 1;
1008                 m_pipesLog2 = 0;
1009                 break;
1010             case ADDR_CONFIG_2_PIPE:
1011                 m_pipes = 2;
1012                 m_pipesLog2 = 1;
1013                 break;
1014             case ADDR_CONFIG_4_PIPE:
1015                 m_pipes = 4;
1016                 m_pipesLog2 = 2;
1017                 break;
1018             case ADDR_CONFIG_8_PIPE:
1019                 m_pipes = 8;
1020                 m_pipesLog2 = 3;
1021                 break;
1022             case ADDR_CONFIG_16_PIPE:
1023                 m_pipes = 16;
1024                 m_pipesLog2 = 4;
1025                 break;
1026             case ADDR_CONFIG_32_PIPE:
1027                 m_pipes = 32;
1028                 m_pipesLog2 = 5;
1029                 break;
1030             default:
1031                 ADDR_ASSERT_ALWAYS();
1032                 break;
1033         }
1034 
1035         switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
1036         {
1037             case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
1038                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
1039                 m_pipeInterleaveLog2 = 8;
1040                 break;
1041             case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
1042                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
1043                 m_pipeInterleaveLog2 = 9;
1044                 break;
1045             case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
1046                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
1047                 m_pipeInterleaveLog2 = 10;
1048                 break;
1049             case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
1050                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
1051                 m_pipeInterleaveLog2 = 11;
1052                 break;
1053             default:
1054                 ADDR_ASSERT_ALWAYS();
1055                 break;
1056         }
1057 
1058         switch (gbAddrConfig.bits.NUM_BANKS)
1059         {
1060             case ADDR_CONFIG_1_BANK:
1061                 m_banks = 1;
1062                 m_banksLog2 = 0;
1063                 break;
1064             case ADDR_CONFIG_2_BANK:
1065                 m_banks = 2;
1066                 m_banksLog2 = 1;
1067                 break;
1068             case ADDR_CONFIG_4_BANK:
1069                 m_banks = 4;
1070                 m_banksLog2 = 2;
1071                 break;
1072             case ADDR_CONFIG_8_BANK:
1073                 m_banks = 8;
1074                 m_banksLog2 = 3;
1075                 break;
1076             case ADDR_CONFIG_16_BANK:
1077                 m_banks = 16;
1078                 m_banksLog2 = 4;
1079                 break;
1080             default:
1081                 ADDR_ASSERT_ALWAYS();
1082                 break;
1083         }
1084 
1085         switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
1086         {
1087             case ADDR_CONFIG_1_SHADER_ENGINE:
1088                 m_se = 1;
1089                 m_seLog2 = 0;
1090                 break;
1091             case ADDR_CONFIG_2_SHADER_ENGINE:
1092                 m_se = 2;
1093                 m_seLog2 = 1;
1094                 break;
1095             case ADDR_CONFIG_4_SHADER_ENGINE:
1096                 m_se = 4;
1097                 m_seLog2 = 2;
1098                 break;
1099             case ADDR_CONFIG_8_SHADER_ENGINE:
1100                 m_se = 8;
1101                 m_seLog2 = 3;
1102                 break;
1103             default:
1104                 ADDR_ASSERT_ALWAYS();
1105                 break;
1106         }
1107 
1108         switch (gbAddrConfig.bits.NUM_RB_PER_SE)
1109         {
1110             case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
1111                 m_rbPerSe = 1;
1112                 m_rbPerSeLog2 = 0;
1113                 break;
1114             case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
1115                 m_rbPerSe = 2;
1116                 m_rbPerSeLog2 = 1;
1117                 break;
1118             case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
1119                 m_rbPerSe = 4;
1120                 m_rbPerSeLog2 = 2;
1121                 break;
1122             default:
1123                 ADDR_ASSERT_ALWAYS();
1124                 break;
1125         }
1126 
1127         switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
1128         {
1129             case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
1130                 m_maxCompFrag = 1;
1131                 m_maxCompFragLog2 = 0;
1132                 break;
1133             case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
1134                 m_maxCompFrag = 2;
1135                 m_maxCompFragLog2 = 1;
1136                 break;
1137             case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
1138                 m_maxCompFrag = 4;
1139                 m_maxCompFragLog2 = 2;
1140                 break;
1141             case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
1142                 m_maxCompFrag = 8;
1143                 m_maxCompFragLog2 = 3;
1144                 break;
1145             default:
1146                 ADDR_ASSERT_ALWAYS();
1147                 break;
1148         }
1149 
1150         m_blockVarSizeLog2 = pCreateIn->regValue.blockVarSizeLog2;
1151         ADDR_ASSERT((m_blockVarSizeLog2 == 0) ||
1152                     ((m_blockVarSizeLog2 >= 17u) && (m_blockVarSizeLog2 <= 20u)));
1153         m_blockVarSizeLog2 = Min(Max(17u, m_blockVarSizeLog2), 20u);
1154     }
1155     else
1156     {
1157         valid = FALSE;
1158         ADDR_NOT_IMPLEMENTED();
1159     }
1160 
1161     if (valid)
1162     {
1163         InitEquationTable();
1164     }
1165 
1166     return valid;
1167 }
1168 
1169 /**
1170 ************************************************************************************************************************
1171 *   Gfx9Lib::HwlConvertChipFamily
1172 *
1173 *   @brief
1174 *       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1175 *   @return
1176 *       ChipFamily
1177 ************************************************************************************************************************
1178 */
HwlConvertChipFamily(UINT_32 uChipFamily,UINT_32 uChipRevision)1179 ChipFamily Gfx9Lib::HwlConvertChipFamily(
1180     UINT_32 uChipFamily,        ///< [in] chip family defined in atiih.h
1181     UINT_32 uChipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
1182 {
1183     ChipFamily family = ADDR_CHIP_FAMILY_AI;
1184 
1185     switch (uChipFamily)
1186     {
1187         case FAMILY_AI:
1188             m_settings.isArcticIsland = 1;
1189             m_settings.isVega10    = ASICREV_IS_VEGA10_P(uChipRevision);
1190 
1191             m_settings.isDce12 = 1;
1192 
1193             if (m_settings.isVega10 == 0)
1194             {
1195                 m_settings.htileAlignFix = 1;
1196                 m_settings.applyAliasFix = 1;
1197             }
1198 
1199             m_settings.metaBaseAlignFix = 1;
1200 
1201             m_settings.depthPipeXorDisable = 1;
1202             break;
1203         case FAMILY_RV:
1204             m_settings.isArcticIsland = 1;
1205             m_settings.isRaven        = ASICREV_IS_RAVEN(uChipRevision);
1206 
1207             if (m_settings.isRaven)
1208             {
1209                 m_settings.isDcn1   = 1;
1210             }
1211 
1212             m_settings.metaBaseAlignFix = 1;
1213 
1214             if (ASICREV_IS_RAVEN(uChipRevision))
1215             {
1216                 m_settings.depthPipeXorDisable = 1;
1217             }
1218             break;
1219 
1220         default:
1221             ADDR_ASSERT(!"This should be a Fusion");
1222             break;
1223     }
1224 
1225     return family;
1226 }
1227 
1228 /**
1229 ************************************************************************************************************************
1230 *   Gfx9Lib::InitRbEquation
1231 *
1232 *   @brief
1233 *       Init RB equation
1234 *   @return
1235 *       N/A
1236 ************************************************************************************************************************
1237 */
GetRbEquation(CoordEq * pRbEq,UINT_32 numRbPerSeLog2,UINT_32 numSeLog2) const1238 VOID Gfx9Lib::GetRbEquation(
1239     CoordEq* pRbEq,             ///< [out] rb equation
1240     UINT_32  numRbPerSeLog2,    ///< [in] number of rb per shader engine
1241     UINT_32  numSeLog2)         ///< [in] number of shader engine
1242     const
1243 {
1244     // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1245     UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
1246     Coordinate cx('x', rbRegion);
1247     Coordinate cy('y', rbRegion);
1248 
1249     UINT_32 start = 0;
1250     UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
1251 
1252     // Clear the rb equation
1253     pRbEq->resize(0);
1254     pRbEq->resize(numRbTotalLog2);
1255 
1256     if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
1257     {
1258         // Special case when more than 1 SE, and 2 RB per SE
1259         (*pRbEq)[0].add(cx);
1260         (*pRbEq)[0].add(cy);
1261         cx++;
1262         cy++;
1263 
1264         if (m_settings.applyAliasFix == false)
1265         {
1266             (*pRbEq)[0].add(cy);
1267         }
1268 
1269         (*pRbEq)[0].add(cy);
1270         start++;
1271     }
1272 
1273     UINT_32 numBits = 2 * (numRbTotalLog2 - start);
1274 
1275     for (UINT_32 i = 0; i < numBits; i++)
1276     {
1277         UINT_32 idx =
1278             start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
1279 
1280         if ((i % 2) == 1)
1281         {
1282             (*pRbEq)[idx].add(cx);
1283             cx++;
1284         }
1285         else
1286         {
1287             (*pRbEq)[idx].add(cy);
1288             cy++;
1289         }
1290     }
1291 }
1292 
1293 /**
1294 ************************************************************************************************************************
1295 *   Gfx9Lib::GetDataEquation
1296 *
1297 *   @brief
1298 *       Get data equation for fmask and Z
1299 *   @return
1300 *       N/A
1301 ************************************************************************************************************************
1302 */
GetDataEquation(CoordEq * pDataEq,Gfx9DataType dataSurfaceType,AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 elementBytesLog2,UINT_32 numSamplesLog2) const1303 VOID Gfx9Lib::GetDataEquation(
1304     CoordEq* pDataEq,               ///< [out] data surface equation
1305     Gfx9DataType dataSurfaceType,   ///< [in] data surface type
1306     AddrSwizzleMode swizzleMode,    ///< [in] data surface swizzle mode
1307     AddrResourceType resourceType,  ///< [in] data surface resource type
1308     UINT_32 elementBytesLog2,       ///< [in] data surface element bytes
1309     UINT_32 numSamplesLog2)         ///< [in] data surface sample count
1310     const
1311 {
1312     Coordinate cx('x', 0);
1313     Coordinate cy('y', 0);
1314     Coordinate cz('z', 0);
1315     Coordinate cs('s', 0);
1316 
1317     // Clear the equation
1318     pDataEq->resize(0);
1319     pDataEq->resize(27);
1320 
1321     if (dataSurfaceType == Gfx9DataColor)
1322     {
1323         if (IsLinear(swizzleMode))
1324         {
1325             Coordinate cm('m', 0);
1326 
1327             pDataEq->resize(49);
1328 
1329             for (UINT_32 i = 0; i < 49; i++)
1330             {
1331                 (*pDataEq)[i].add(cm);
1332                 cm++;
1333             }
1334         }
1335         else if (IsThick(resourceType, swizzleMode))
1336         {
1337             // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1338             UINT_32 i;
1339             if (IsStandardSwizzle(resourceType, swizzleMode))
1340             {
1341                 // Standard 3d swizzle
1342                 // Fill in bottom x bits
1343                 for (i = elementBytesLog2; i < 4; i++)
1344                 {
1345                     (*pDataEq)[i].add(cx);
1346                     cx++;
1347                 }
1348                 // Fill in 2 bits of y and then z
1349                 for (i = 4; i < 6; i++)
1350                 {
1351                     (*pDataEq)[i].add(cy);
1352                     cy++;
1353                 }
1354                 for (i = 6; i < 8; i++)
1355                 {
1356                     (*pDataEq)[i].add(cz);
1357                     cz++;
1358                 }
1359                 if (elementBytesLog2 < 2)
1360                 {
1361                     // fill in z & y bit
1362                     (*pDataEq)[8].add(cz);
1363                     (*pDataEq)[9].add(cy);
1364                     cz++;
1365                     cy++;
1366                 }
1367                 else if (elementBytesLog2 == 2)
1368                 {
1369                     // fill in y and x bit
1370                     (*pDataEq)[8].add(cy);
1371                     (*pDataEq)[9].add(cx);
1372                     cy++;
1373                     cx++;
1374                 }
1375                 else
1376                 {
1377                     // fill in 2 x bits
1378                     (*pDataEq)[8].add(cx);
1379                     cx++;
1380                     (*pDataEq)[9].add(cx);
1381                     cx++;
1382                 }
1383             }
1384             else
1385             {
1386                 // Z 3d swizzle
1387                 UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
1388                 UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
1389                                 2 : ((elementBytesLog2 == 1) ? 3 : 1);
1390                 pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
1391                 for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
1392                 {
1393                     (*pDataEq)[i].add(cz);
1394                     cz++;
1395                 }
1396                 if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
1397                 {
1398                     // add an x and z
1399                     (*pDataEq)[6].add(cx);
1400                     (*pDataEq)[7].add(cz);
1401                     cx++;
1402                     cz++;
1403                 }
1404                 else if (elementBytesLog2 == 2)
1405                 {
1406                     // add a y and z
1407                     (*pDataEq)[6].add(cy);
1408                     (*pDataEq)[7].add(cz);
1409                     cy++;
1410                     cz++;
1411                 }
1412                 // add y and x
1413                 (*pDataEq)[8].add(cy);
1414                 (*pDataEq)[9].add(cx);
1415                 cy++;
1416                 cx++;
1417             }
1418             // Fill in bit 10 and up
1419             pDataEq->mort3d( cz, cy, cx, 10 );
1420         }
1421         else if (IsThin(resourceType, swizzleMode))
1422         {
1423             UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1424             // Color 2D
1425             UINT_32 microYBits = (8 - elementBytesLog2) / 2;
1426             UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
1427             UINT_32 i;
1428             // Fill in bottom x bits
1429             for (i = elementBytesLog2; i < 4; i++)
1430             {
1431                 (*pDataEq)[i].add(cx);
1432                 cx++;
1433             }
1434             // Fill in bottom y bits
1435             for (i = 4; i < 4 + microYBits; i++)
1436             {
1437                 (*pDataEq)[i].add(cy);
1438                 cy++;
1439             }
1440             // Fill in last of the micro_x bits
1441             for (i = 4 + microYBits; i < 8; i++)
1442             {
1443                 (*pDataEq)[i].add(cx);
1444                 cx++;
1445             }
1446             // Fill in x/y bits below sample split
1447             pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
1448             // Fill in sample bits
1449             for (i = 0; i < numSamplesLog2; i++)
1450             {
1451                 cs.set('s', i);
1452                 (*pDataEq)[tileSplitStart + i].add(cs);
1453             }
1454             // Fill in x/y bits above sample split
1455             if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
1456             {
1457                 pDataEq->mort2d(cx, cy, blockSizeLog2);
1458             }
1459             else
1460             {
1461                 pDataEq->mort2d(cy, cx, blockSizeLog2);
1462             }
1463         }
1464         else
1465         {
1466             ADDR_ASSERT_ALWAYS();
1467         }
1468     }
1469     else
1470     {
1471         // Fmask or depth
1472         UINT_32 sampleStart = elementBytesLog2;
1473         UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
1474         UINT_32 ymajStart = 6 + numSamplesLog2;
1475 
1476         for (UINT_32 s = 0; s < numSamplesLog2; s++)
1477         {
1478             cs.set('s', s);
1479             (*pDataEq)[sampleStart + s].add(cs);
1480         }
1481 
1482         // Put in the x-major order pixel bits
1483         pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
1484         // Put in the y-major order pixel bits
1485         pDataEq->mort2d(cy, cx, ymajStart);
1486     }
1487 }
1488 
1489 /**
1490 ************************************************************************************************************************
1491 *   Gfx9Lib::GetPipeEquation
1492 *
1493 *   @brief
1494 *       Get pipe equation
1495 *   @return
1496 *       N/A
1497 ************************************************************************************************************************
1498 */
GetPipeEquation(CoordEq * pPipeEq,CoordEq * pDataEq,UINT_32 pipeInterleaveLog2,UINT_32 numPipeLog2,UINT_32 numSamplesLog2,Gfx9DataType dataSurfaceType,AddrSwizzleMode swizzleMode,AddrResourceType resourceType) const1499 VOID Gfx9Lib::GetPipeEquation(
1500     CoordEq*         pPipeEq,            ///< [out] pipe equation
1501     CoordEq*         pDataEq,            ///< [in] data equation
1502     UINT_32          pipeInterleaveLog2, ///< [in] pipe interleave
1503     UINT_32          numPipeLog2,        ///< [in] number of pipes
1504     UINT_32          numSamplesLog2,     ///< [in] data surface sample count
1505     Gfx9DataType     dataSurfaceType,    ///< [in] data surface type
1506     AddrSwizzleMode  swizzleMode,        ///< [in] data surface swizzle mode
1507     AddrResourceType resourceType        ///< [in] data surface resource type
1508     ) const
1509 {
1510     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1511     CoordEq dataEq;
1512 
1513     pDataEq->copy(dataEq);
1514 
1515     if (dataSurfaceType == Gfx9DataColor)
1516     {
1517         INT_32 shift = static_cast<INT_32>(numSamplesLog2);
1518         dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
1519     }
1520 
1521     dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
1522 
1523     // This section should only apply to z/stencil, maybe fmask
1524     // If the pipe bit is below the comp block size,
1525     // then keep moving up the address until we find a bit that is above
1526     UINT_32 pipeStart = 0;
1527 
1528     if (dataSurfaceType != Gfx9DataColor)
1529     {
1530         Coordinate tileMin('x', 3);
1531 
1532         while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
1533         {
1534             pipeStart++;
1535         }
1536 
1537         // if pipe is 0, then the first pipe bit is above the comp block size,
1538         // so we don't need to do anything
1539         // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1540         // we will get the same pipe equation
1541         if (pipeStart != 0)
1542         {
1543             for (UINT_32 i = 0; i < numPipeLog2; i++)
1544             {
1545                 // Copy the jth bit above pipe interleave to the current pipe equation bit
1546                 dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
1547             }
1548         }
1549     }
1550 
1551     if (IsPrt(swizzleMode))
1552     {
1553         // Clear out bits above the block size if prt's are enabled
1554         dataEq.resize(blockSizeLog2);
1555         dataEq.resize(48);
1556     }
1557 
1558     if (IsXor(swizzleMode))
1559     {
1560         CoordEq xorMask;
1561 
1562         if (IsThick(resourceType, swizzleMode))
1563         {
1564             CoordEq xorMask2;
1565 
1566             dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
1567 
1568             xorMask.resize(numPipeLog2);
1569 
1570             for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1571             {
1572                 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
1573                 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
1574             }
1575         }
1576         else
1577         {
1578             // Xor in the bits above the pipe+gpu bits
1579             dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
1580 
1581             if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
1582             {
1583                 Coordinate co;
1584                 CoordEq xorMask2;
1585                 // if 1xaa and not prt, then xor in the z bits
1586                 xorMask2.resize(0);
1587                 xorMask2.resize(numPipeLog2);
1588                 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1589                 {
1590                     co.set('z', numPipeLog2 - 1 - pipeIdx);
1591                     xorMask2[pipeIdx].add(co);
1592                 }
1593 
1594                 pPipeEq->xorin(xorMask2);
1595             }
1596         }
1597 
1598         xorMask.reverse();
1599         pPipeEq->xorin(xorMask);
1600     }
1601 }
1602 /**
1603 ************************************************************************************************************************
1604 *   Gfx9Lib::GetMetaEquation
1605 *
1606 *   @brief
1607 *       Get meta equation for cmask/htile/DCC
1608 *   @return
1609 *       Pointer to a calculated meta equation
1610 ************************************************************************************************************************
1611 */
GetMetaEquation(const MetaEqParams & metaEqParams)1612 const CoordEq* Gfx9Lib::GetMetaEquation(
1613     const MetaEqParams& metaEqParams)
1614 {
1615     UINT_32 cachedMetaEqIndex;
1616 
1617     for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++)
1618     {
1619         if (memcmp(&metaEqParams,
1620                    &m_cachedMetaEqKey[cachedMetaEqIndex],
1621                    static_cast<UINT_32>(sizeof(metaEqParams))) == 0)
1622         {
1623             break;
1624         }
1625     }
1626 
1627     CoordEq* pMetaEq = NULL;
1628 
1629     if (cachedMetaEqIndex < MaxCachedMetaEq)
1630     {
1631         pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex];
1632     }
1633     else
1634     {
1635         m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams;
1636 
1637         pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++];
1638 
1639         m_metaEqOverrideIndex %= MaxCachedMetaEq;
1640 
1641         GenMetaEquation(pMetaEq,
1642                         metaEqParams.maxMip,
1643                         metaEqParams.elementBytesLog2,
1644                         metaEqParams.numSamplesLog2,
1645                         metaEqParams.metaFlag,
1646                         metaEqParams.dataSurfaceType,
1647                         metaEqParams.swizzleMode,
1648                         metaEqParams.resourceType,
1649                         metaEqParams.metaBlkWidthLog2,
1650                         metaEqParams.metaBlkHeightLog2,
1651                         metaEqParams.metaBlkDepthLog2,
1652                         metaEqParams.compBlkWidthLog2,
1653                         metaEqParams.compBlkHeightLog2,
1654                         metaEqParams.compBlkDepthLog2);
1655     }
1656 
1657     return pMetaEq;
1658 }
1659 
1660 /**
1661 ************************************************************************************************************************
1662 *   Gfx9Lib::GenMetaEquation
1663 *
1664 *   @brief
1665 *       Get meta equation for cmask/htile/DCC
1666 *   @return
1667 *       N/A
1668 ************************************************************************************************************************
1669 */
GenMetaEquation(CoordEq * pMetaEq,UINT_32 maxMip,UINT_32 elementBytesLog2,UINT_32 numSamplesLog2,ADDR2_META_FLAGS metaFlag,Gfx9DataType dataSurfaceType,AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 metaBlkWidthLog2,UINT_32 metaBlkHeightLog2,UINT_32 metaBlkDepthLog2,UINT_32 compBlkWidthLog2,UINT_32 compBlkHeightLog2,UINT_32 compBlkDepthLog2) const1670 VOID Gfx9Lib::GenMetaEquation(
1671     CoordEq*         pMetaEq,               ///< [out] meta equation
1672     UINT_32          maxMip,                ///< [in] max mip Id
1673     UINT_32          elementBytesLog2,      ///< [in] data surface element bytes
1674     UINT_32          numSamplesLog2,        ///< [in] data surface sample count
1675     ADDR2_META_FLAGS metaFlag,              ///< [in] meta falg
1676     Gfx9DataType     dataSurfaceType,       ///< [in] data surface type
1677     AddrSwizzleMode  swizzleMode,           ///< [in] data surface swizzle mode
1678     AddrResourceType resourceType,          ///< [in] data surface resource type
1679     UINT_32          metaBlkWidthLog2,      ///< [in] meta block width
1680     UINT_32          metaBlkHeightLog2,     ///< [in] meta block height
1681     UINT_32          metaBlkDepthLog2,      ///< [in] meta block depth
1682     UINT_32          compBlkWidthLog2,      ///< [in] compress block width
1683     UINT_32          compBlkHeightLog2,     ///< [in] compress block height
1684     UINT_32          compBlkDepthLog2)      ///< [in] compress block depth
1685     const
1686 {
1687     UINT_32 numPipeTotalLog2   = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
1688     UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
1689 
1690     // Get the correct data address and rb equation
1691     CoordEq dataEq;
1692     GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
1693                     elementBytesLog2, numSamplesLog2);
1694 
1695     // Get pipe and rb equations
1696     CoordEq pipeEquation;
1697     GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
1698                     numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
1699     numPipeTotalLog2 = pipeEquation.getsize();
1700 
1701     if (metaFlag.linear)
1702     {
1703         // Linear metadata supporting was removed for GFX9! No one can use this feature.
1704         ADDR_ASSERT_ALWAYS();
1705 
1706         ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
1707 
1708         dataEq.copy(*pMetaEq);
1709 
1710         if (IsLinear(swizzleMode))
1711         {
1712             if (metaFlag.pipeAligned)
1713             {
1714                 // Remove the pipe bits
1715                 INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
1716                 pMetaEq->shift(-shift, pipeInterleaveLog2);
1717             }
1718             // Divide by comp block size, which for linear (which is always color) is 256 B
1719             pMetaEq->shift(-8);
1720 
1721             if (metaFlag.pipeAligned)
1722             {
1723                 // Put pipe bits back in
1724                 pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
1725 
1726                 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1727                 {
1728                     pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
1729                 }
1730             }
1731         }
1732 
1733         pMetaEq->shift(1);
1734     }
1735     else
1736     {
1737         UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
1738         UINT_32 compFragLog2 =
1739             ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
1740             maxCompFragLog2 : numSamplesLog2;
1741 
1742         UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
1743 
1744         // Make sure the metaaddr is cleared
1745         pMetaEq->resize(0);
1746         pMetaEq->resize(27);
1747 
1748         if (IsThick(resourceType, swizzleMode))
1749         {
1750             Coordinate cx('x', 0);
1751             Coordinate cy('y', 0);
1752             Coordinate cz('z', 0);
1753 
1754             if (maxMip > 0)
1755             {
1756                 pMetaEq->mort3d(cy, cx, cz);
1757             }
1758             else
1759             {
1760                 pMetaEq->mort3d(cx, cy, cz);
1761             }
1762         }
1763         else
1764         {
1765             Coordinate cx('x', 0);
1766             Coordinate cy('y', 0);
1767             Coordinate cs;
1768 
1769             if (maxMip > 0)
1770             {
1771                 pMetaEq->mort2d(cy, cx, compFragLog2);
1772             }
1773             else
1774             {
1775                 pMetaEq->mort2d(cx, cy, compFragLog2);
1776             }
1777 
1778             //------------------------------------------------------------------------------------------------------------------------
1779             // Put the compressible fragments at the lsb
1780             // the uncompressible frags will be at the msb of the micro address
1781             //------------------------------------------------------------------------------------------------------------------------
1782             for (UINT_32 s = 0; s < compFragLog2; s++)
1783             {
1784                 cs.set('s', s);
1785                 (*pMetaEq)[s].add(cs);
1786             }
1787         }
1788 
1789         // Keep a copy of the pipe equations
1790         CoordEq origPipeEquation;
1791         pipeEquation.copy(origPipeEquation);
1792 
1793         Coordinate co;
1794         // filter out everything under the compressed block size
1795         co.set('x', compBlkWidthLog2);
1796         pMetaEq->Filter('<', co, 0, 'x');
1797         co.set('y', compBlkHeightLog2);
1798         pMetaEq->Filter('<', co, 0, 'y');
1799         co.set('z', compBlkDepthLog2);
1800         pMetaEq->Filter('<', co, 0, 'z');
1801 
1802         // For non-color, filter out sample bits
1803         if (dataSurfaceType != Gfx9DataColor)
1804         {
1805             co.set('x', 0);
1806             pMetaEq->Filter('<', co, 0, 's');
1807         }
1808 
1809         // filter out everything above the metablock size
1810         co.set('x', metaBlkWidthLog2 - 1);
1811         pMetaEq->Filter('>', co, 0, 'x');
1812         co.set('y', metaBlkHeightLog2 - 1);
1813         pMetaEq->Filter('>', co, 0, 'y');
1814         co.set('z', metaBlkDepthLog2 - 1);
1815         pMetaEq->Filter('>', co, 0, 'z');
1816 
1817         // filter out everything above the metablock size for the channel bits
1818         co.set('x', metaBlkWidthLog2 - 1);
1819         pipeEquation.Filter('>', co, 0, 'x');
1820         co.set('y', metaBlkHeightLog2 - 1);
1821         pipeEquation.Filter('>', co, 0, 'y');
1822         co.set('z', metaBlkDepthLog2 - 1);
1823         pipeEquation.Filter('>', co, 0, 'z');
1824 
1825         // Make sure we still have the same number of channel bits
1826         if (pipeEquation.getsize() != numPipeTotalLog2)
1827         {
1828             ADDR_ASSERT_ALWAYS();
1829         }
1830 
1831         // Loop through all channel and rb bits,
1832         // and make sure these components exist in the metadata address
1833         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1834         {
1835             for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
1836             {
1837                 if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
1838                 {
1839                     ADDR_ASSERT_ALWAYS();
1840                 }
1841             }
1842         }
1843 
1844         const UINT_32 numSeLog2     = metaFlag.rbAligned ? m_seLog2      : 0;
1845         const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
1846         const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
1847         CoordEq       origRbEquation;
1848 
1849         GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
1850 
1851         CoordEq rbEquation = origRbEquation;
1852 
1853         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1854         {
1855             for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
1856             {
1857                 if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
1858                 {
1859                     ADDR_ASSERT_ALWAYS();
1860                 }
1861             }
1862         }
1863 
1864         if (m_settings.applyAliasFix)
1865         {
1866             co.set('z', -1);
1867         }
1868 
1869         // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
1870         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1871         {
1872             for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
1873             {
1874                 BOOL_32 isRbEquationInPipeEquation = FALSE;
1875 
1876                 if (m_settings.applyAliasFix)
1877                 {
1878                     CoordTerm filteredPipeEq;
1879                     filteredPipeEq = pipeEquation[j];
1880 
1881                     filteredPipeEq.Filter('>', co, 0, 'z');
1882 
1883                     isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq);
1884                 }
1885                 else
1886                 {
1887                     isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]);
1888                 }
1889 
1890                 if (isRbEquationInPipeEquation)
1891                 {
1892                     rbEquation[i].Clear();
1893                 }
1894             }
1895         }
1896 
1897          bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {};
1898 
1899         // Loop through each bit of the channel, get the smallest coordinate,
1900         // and remove it from the metaaddr, and rb_equation
1901         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1902         {
1903             pipeEquation[i].getsmallest(co);
1904 
1905             UINT_32 old_size = pMetaEq->getsize();
1906             pMetaEq->Filter('=', co);
1907             UINT_32 new_size = pMetaEq->getsize();
1908             if (new_size != old_size-1)
1909             {
1910                 ADDR_ASSERT_ALWAYS();
1911             }
1912             pipeEquation.remove(co);
1913             for (UINT_32 j = 0; j < numRbTotalLog2; j++)
1914             {
1915                 if (rbEquation[j].remove(co))
1916                 {
1917                     // if we actually removed something from this bit, then add the remaining
1918                     // channel bits, as these can be removed for this bit
1919                     for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
1920                     {
1921                         if (pipeEquation[i][k] != co)
1922                         {
1923                             rbEquation[j].add(pipeEquation[i][k]);
1924                             rbAppendedWithPipeBits[j] = true;
1925                         }
1926                     }
1927                 }
1928             }
1929         }
1930 
1931         // Loop through the rb bits and see what remain;
1932         // filter out the smallest coordinate if it remains
1933         UINT_32 rbBitsLeft = 0;
1934         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
1935         {
1936             BOOL_32 isRbEqAppended = FALSE;
1937 
1938             if (m_settings.applyAliasFix)
1939             {
1940                 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
1941             }
1942             else
1943             {
1944                 isRbEqAppended = (rbEquation[i].getsize() > 0);
1945             }
1946 
1947             if (isRbEqAppended)
1948             {
1949                 rbBitsLeft++;
1950                 rbEquation[i].getsmallest(co);
1951                 UINT_32 old_size = pMetaEq->getsize();
1952                 pMetaEq->Filter('=', co);
1953                 UINT_32 new_size = pMetaEq->getsize();
1954                 if (new_size != old_size - 1)
1955                 {
1956                     // assert warning
1957                 }
1958                 for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
1959                 {
1960                     if (rbEquation[j].remove(co))
1961                     {
1962                         // if we actually removed something from this bit, then add the remaining
1963                         // rb bits, as these can be removed for this bit
1964                         for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
1965                         {
1966                             if (rbEquation[i][k] != co)
1967                             {
1968                                 rbEquation[j].add(rbEquation[i][k]);
1969                                 rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i];
1970                             }
1971                         }
1972                     }
1973                 }
1974             }
1975         }
1976 
1977         // capture the size of the metaaddr
1978         UINT_32 metaSize = pMetaEq->getsize();
1979         // resize to 49 bits...make this a nibble address
1980         pMetaEq->resize(49);
1981         // Concatenate the macro address above the current address
1982         for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
1983         {
1984             co.set('m', j);
1985             (*pMetaEq)[i].add(co);
1986         }
1987 
1988         // Multiply by meta element size (in nibbles)
1989         if (dataSurfaceType == Gfx9DataColor)
1990         {
1991             pMetaEq->shift(1);
1992         }
1993         else if (dataSurfaceType == Gfx9DataDepthStencil)
1994         {
1995             pMetaEq->shift(3);
1996         }
1997 
1998         //------------------------------------------------------------------------------------------
1999         // Note the pipeInterleaveLog2+1 is because address is a nibble address
2000         // Shift up from pipe interleave number of channel
2001         // and rb bits left, and uncompressed fragments
2002         //------------------------------------------------------------------------------------------
2003 
2004         pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
2005 
2006         // Put in the channel bits
2007         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2008         {
2009             origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
2010         }
2011 
2012         // Put in remaining rb bits
2013         for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
2014         {
2015             BOOL_32 isRbEqAppended = FALSE;
2016 
2017             if (m_settings.applyAliasFix)
2018             {
2019                 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2020             }
2021             else
2022             {
2023                 isRbEqAppended = (rbEquation[i].getsize() > 0);
2024             }
2025 
2026             if (isRbEqAppended)
2027             {
2028                 origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
2029                 // Mark any rb bit we add in to the rb mask
2030                 j++;
2031             }
2032         }
2033 
2034         //------------------------------------------------------------------------------------------
2035         // Put in the uncompressed fragment bits
2036         //------------------------------------------------------------------------------------------
2037         for (UINT_32 i = 0; i < uncompFragLog2; i++)
2038         {
2039             co.set('s', compFragLog2 + i);
2040             (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
2041         }
2042     }
2043 }
2044 
2045 /**
2046 ************************************************************************************************************************
2047 *   Gfx9Lib::IsEquationSupported
2048 *
2049 *   @brief
2050 *       Check if equation is supported for given swizzle mode and resource type.
2051 *
2052 *   @return
2053 *       TRUE if supported
2054 ************************************************************************************************************************
2055 */
IsEquationSupported(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2) const2056 BOOL_32 Gfx9Lib::IsEquationSupported(
2057     AddrResourceType rsrcType,
2058     AddrSwizzleMode  swMode,
2059     UINT_32          elementBytesLog2) const
2060 {
2061     BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
2062                         (IsLinear(swMode) == FALSE) &&
2063                         (((IsTex2d(rsrcType) == TRUE) &&
2064                           ((elementBytesLog2 < 4) ||
2065                            ((IsRotateSwizzle(swMode) == FALSE) &&
2066                             (IsZOrderSwizzle(swMode) == FALSE)))) ||
2067                          ((IsTex3d(rsrcType) == TRUE) &&
2068                           (IsRotateSwizzle(swMode) == FALSE) &&
2069                           (IsBlock256b(swMode) == FALSE)));
2070 
2071     return supported;
2072 }
2073 
2074 /**
2075 ************************************************************************************************************************
2076 *   Gfx9Lib::InitEquationTable
2077 *
2078 *   @brief
2079 *       Initialize Equation table.
2080 *
2081 *   @return
2082 *       N/A
2083 ************************************************************************************************************************
2084 */
InitEquationTable()2085 VOID Gfx9Lib::InitEquationTable()
2086 {
2087     memset(m_equationTable, 0, sizeof(m_equationTable));
2088 
2089     // Loop all possible resource type (2D/3D)
2090     for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
2091     {
2092         AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
2093 
2094         // Loop all possible swizzle mode
2095         for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwMode; swModeIdx++)
2096         {
2097             AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
2098 
2099             // Loop all possible bpp
2100             for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
2101             {
2102                 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
2103 
2104                 // Check if the input is supported
2105                 if (IsEquationSupported(rsrcType, swMode, bppIdx))
2106                 {
2107                     ADDR_EQUATION equation;
2108                     ADDR_E_RETURNCODE retCode;
2109 
2110                     memset(&equation, 0, sizeof(ADDR_EQUATION));
2111 
2112                     // Generate the equation
2113                     if (IsBlock256b(swMode) && IsTex2d(rsrcType))
2114                     {
2115                         retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
2116                     }
2117                     else if (IsThin(rsrcType, swMode))
2118                     {
2119                         retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
2120                     }
2121                     else
2122                     {
2123                         retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
2124                     }
2125 
2126                     // Only fill the equation into the table if the return code is ADDR_OK,
2127                     // otherwise if the return code is not ADDR_OK, it indicates this is not
2128                     // a valid input, we do nothing but just fill invalid equation index
2129                     // into the lookup table.
2130                     if (retCode == ADDR_OK)
2131                     {
2132                         equationIndex = m_numEquations;
2133                         ADDR_ASSERT(equationIndex < EquationTableSize);
2134 
2135                         m_equationTable[equationIndex] = equation;
2136 
2137                         m_numEquations++;
2138                     }
2139                     else
2140                     {
2141                         ADDR_ASSERT_ALWAYS();
2142                     }
2143                 }
2144 
2145                 // Fill the index into the lookup table, if the combination is not supported
2146                 // fill the invalid equation index
2147                 m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
2148             }
2149         }
2150     }
2151 }
2152 
2153 /**
2154 ************************************************************************************************************************
2155 *   Gfx9Lib::HwlGetEquationIndex
2156 *
2157 *   @brief
2158 *       Interface function stub of GetEquationIndex
2159 *
2160 *   @return
2161 *       ADDR_E_RETURNCODE
2162 ************************************************************************************************************************
2163 */
HwlGetEquationIndex(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const2164 UINT_32 Gfx9Lib::HwlGetEquationIndex(
2165     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
2166     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut
2167     ) const
2168 {
2169     AddrResourceType rsrcType         = pIn->resourceType;
2170     AddrSwizzleMode  swMode           = pIn->swizzleMode;
2171     UINT_32          elementBytesLog2 = Log2(pIn->bpp >> 3);
2172     UINT_32          index            = ADDR_INVALID_EQUATION_INDEX;
2173 
2174     if (IsEquationSupported(rsrcType, swMode, elementBytesLog2))
2175     {
2176         UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
2177         UINT_32 swModeIdx   = static_cast<UINT_32>(swMode);
2178 
2179         index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
2180     }
2181 
2182     if (pOut->pMipInfo != NULL)
2183     {
2184         for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2185         {
2186             pOut->pMipInfo[i].equationIndex = index;
2187         }
2188     }
2189 
2190     return index;
2191 }
2192 
2193 /**
2194 ************************************************************************************************************************
2195 *   Gfx9Lib::HwlComputeBlock256Equation
2196 *
2197 *   @brief
2198 *       Interface function stub of ComputeBlock256Equation
2199 *
2200 *   @return
2201 *       ADDR_E_RETURNCODE
2202 ************************************************************************************************************************
2203 */
HwlComputeBlock256Equation(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2,ADDR_EQUATION * pEquation) const2204 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
2205     AddrResourceType rsrcType,
2206     AddrSwizzleMode  swMode,
2207     UINT_32          elementBytesLog2,
2208     ADDR_EQUATION*   pEquation) const
2209 {
2210     ADDR_E_RETURNCODE ret = ADDR_OK;
2211 
2212     pEquation->numBits = 8;
2213 
2214     UINT_32 i = 0;
2215     for (; i < elementBytesLog2; i++)
2216     {
2217         InitChannel(1, 0 , i, &pEquation->addr[i]);
2218     }
2219 
2220     ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2221 
2222     const UINT_32 maxBitsUsed = 4;
2223     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2224     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2225 
2226     for (i = 0; i < maxBitsUsed; i++)
2227     {
2228         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2229         InitChannel(1, 1, i, &y[i]);
2230     }
2231 
2232     if (IsStandardSwizzle(rsrcType, swMode))
2233     {
2234         switch (elementBytesLog2)
2235         {
2236             case 0:
2237                 pixelBit[0] = x[0];
2238                 pixelBit[1] = x[1];
2239                 pixelBit[2] = x[2];
2240                 pixelBit[3] = x[3];
2241                 pixelBit[4] = y[0];
2242                 pixelBit[5] = y[1];
2243                 pixelBit[6] = y[2];
2244                 pixelBit[7] = y[3];
2245                 break;
2246             case 1:
2247                 pixelBit[0] = x[0];
2248                 pixelBit[1] = x[1];
2249                 pixelBit[2] = x[2];
2250                 pixelBit[3] = y[0];
2251                 pixelBit[4] = y[1];
2252                 pixelBit[5] = y[2];
2253                 pixelBit[6] = x[3];
2254                 break;
2255             case 2:
2256                 pixelBit[0] = x[0];
2257                 pixelBit[1] = x[1];
2258                 pixelBit[2] = y[0];
2259                 pixelBit[3] = y[1];
2260                 pixelBit[4] = y[2];
2261                 pixelBit[5] = x[2];
2262                 break;
2263             case 3:
2264                 pixelBit[0] = x[0];
2265                 pixelBit[1] = y[0];
2266                 pixelBit[2] = y[1];
2267                 pixelBit[3] = x[1];
2268                 pixelBit[4] = x[2];
2269                 break;
2270             case 4:
2271                 pixelBit[0] = y[0];
2272                 pixelBit[1] = y[1];
2273                 pixelBit[2] = x[0];
2274                 pixelBit[3] = x[1];
2275                 break;
2276             default:
2277                 ADDR_ASSERT_ALWAYS();
2278                 ret = ADDR_INVALIDPARAMS;
2279                 break;
2280         }
2281     }
2282     else if (IsDisplaySwizzle(rsrcType, swMode))
2283     {
2284         switch (elementBytesLog2)
2285         {
2286             case 0:
2287                 pixelBit[0] = x[0];
2288                 pixelBit[1] = x[1];
2289                 pixelBit[2] = x[2];
2290                 pixelBit[3] = y[1];
2291                 pixelBit[4] = y[0];
2292                 pixelBit[5] = y[2];
2293                 pixelBit[6] = x[3];
2294                 pixelBit[7] = y[3];
2295                 break;
2296             case 1:
2297                 pixelBit[0] = x[0];
2298                 pixelBit[1] = x[1];
2299                 pixelBit[2] = x[2];
2300                 pixelBit[3] = y[0];
2301                 pixelBit[4] = y[1];
2302                 pixelBit[5] = y[2];
2303                 pixelBit[6] = x[3];
2304                 break;
2305             case 2:
2306                 pixelBit[0] = x[0];
2307                 pixelBit[1] = x[1];
2308                 pixelBit[2] = y[0];
2309                 pixelBit[3] = x[2];
2310                 pixelBit[4] = y[1];
2311                 pixelBit[5] = y[2];
2312                 break;
2313             case 3:
2314                 pixelBit[0] = x[0];
2315                 pixelBit[1] = y[0];
2316                 pixelBit[2] = x[1];
2317                 pixelBit[3] = x[2];
2318                 pixelBit[4] = y[1];
2319                 break;
2320             case 4:
2321                 pixelBit[0] = x[0];
2322                 pixelBit[1] = y[0];
2323                 pixelBit[2] = x[1];
2324                 pixelBit[3] = y[1];
2325                 break;
2326             default:
2327                 ADDR_ASSERT_ALWAYS();
2328                 ret = ADDR_INVALIDPARAMS;
2329                 break;
2330         }
2331     }
2332     else if (IsRotateSwizzle(swMode))
2333     {
2334         switch (elementBytesLog2)
2335         {
2336             case 0:
2337                 pixelBit[0] = y[0];
2338                 pixelBit[1] = y[1];
2339                 pixelBit[2] = y[2];
2340                 pixelBit[3] = x[1];
2341                 pixelBit[4] = x[0];
2342                 pixelBit[5] = x[2];
2343                 pixelBit[6] = x[3];
2344                 pixelBit[7] = y[3];
2345                 break;
2346             case 1:
2347                 pixelBit[0] = y[0];
2348                 pixelBit[1] = y[1];
2349                 pixelBit[2] = y[2];
2350                 pixelBit[3] = x[0];
2351                 pixelBit[4] = x[1];
2352                 pixelBit[5] = x[2];
2353                 pixelBit[6] = x[3];
2354                 break;
2355             case 2:
2356                 pixelBit[0] = y[0];
2357                 pixelBit[1] = y[1];
2358                 pixelBit[2] = x[0];
2359                 pixelBit[3] = y[2];
2360                 pixelBit[4] = x[1];
2361                 pixelBit[5] = x[2];
2362                 break;
2363             case 3:
2364                 pixelBit[0] = y[0];
2365                 pixelBit[1] = x[0];
2366                 pixelBit[2] = y[1];
2367                 pixelBit[3] = x[1];
2368                 pixelBit[4] = x[2];
2369                 break;
2370             default:
2371                 ADDR_ASSERT_ALWAYS();
2372             case 4:
2373                 ret = ADDR_INVALIDPARAMS;
2374                 break;
2375         }
2376     }
2377     else
2378     {
2379         ADDR_ASSERT_ALWAYS();
2380         ret = ADDR_INVALIDPARAMS;
2381     }
2382 
2383     // Post validation
2384     if (ret == ADDR_OK)
2385     {
2386         Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2387         ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
2388                     (microBlockDim.w * (1 << elementBytesLog2)));
2389         ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
2390     }
2391 
2392     return ret;
2393 }
2394 
2395 /**
2396 ************************************************************************************************************************
2397 *   Gfx9Lib::HwlComputeThinEquation
2398 *
2399 *   @brief
2400 *       Interface function stub of ComputeThinEquation
2401 *
2402 *   @return
2403 *       ADDR_E_RETURNCODE
2404 ************************************************************************************************************************
2405 */
HwlComputeThinEquation(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2,ADDR_EQUATION * pEquation) const2406 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
2407     AddrResourceType rsrcType,
2408     AddrSwizzleMode  swMode,
2409     UINT_32          elementBytesLog2,
2410     ADDR_EQUATION*   pEquation) const
2411 {
2412     ADDR_E_RETURNCODE ret = ADDR_OK;
2413 
2414     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2415 
2416     UINT_32 maxXorBits = blockSizeLog2;
2417     if (IsNonPrtXor(swMode))
2418     {
2419         // For non-prt-xor, maybe need to initialize some more bits for xor
2420         // The highest xor bit used in equation will be max the following 3 items:
2421         // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2422         // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2423         // 3. blockSizeLog2
2424 
2425         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
2426         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2427                                      GetPipeXorBits(blockSizeLog2) +
2428                                      2 * GetBankXorBits(blockSizeLog2));
2429     }
2430 
2431     const UINT_32 maxBitsUsed = 14;
2432     ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits);
2433     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2434     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2435 
2436     const UINT_32 extraXorBits = 16;
2437     ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2438     ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2439 
2440     for (UINT_32 i = 0; i < maxBitsUsed; i++)
2441     {
2442         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2443         InitChannel(1, 1, i, &y[i]);
2444     }
2445 
2446     ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
2447 
2448     for (UINT_32 i = 0; i < elementBytesLog2; i++)
2449     {
2450         InitChannel(1, 0 , i, &pixelBit[i]);
2451     }
2452 
2453     UINT_32 xIdx = 0;
2454     UINT_32 yIdx = 0;
2455     UINT_32 lowBits = 0;
2456 
2457     if (IsZOrderSwizzle(swMode))
2458     {
2459         if (elementBytesLog2 <= 3)
2460         {
2461             for (UINT_32 i = elementBytesLog2; i < 6; i++)
2462             {
2463                 pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
2464             }
2465 
2466             lowBits = 6;
2467         }
2468         else
2469         {
2470             ret = ADDR_INVALIDPARAMS;
2471         }
2472     }
2473     else
2474     {
2475         ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
2476 
2477         if (ret == ADDR_OK)
2478         {
2479             Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2480             xIdx = Log2(microBlockDim.w);
2481             yIdx = Log2(microBlockDim.h);
2482             lowBits = 8;
2483         }
2484     }
2485 
2486     if (ret == ADDR_OK)
2487     {
2488         for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2489         {
2490             pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2491         }
2492 
2493         for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2494         {
2495             xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2496         }
2497 
2498         if (IsXor(swMode))
2499         {
2500             // Fill XOR bits
2501             UINT_32 pipeStart = m_pipeInterleaveLog2;
2502             UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2503 
2504             UINT_32 bankStart = pipeStart + pipeXorBits;
2505             UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2506 
2507             for (UINT_32 i = 0; i < pipeXorBits; i++)
2508             {
2509                 UINT_32               xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
2510                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2511                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2512 
2513                 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2514             }
2515 
2516             for (UINT_32 i = 0; i < bankXorBits; i++)
2517             {
2518                 UINT_32               xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
2519                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2520                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2521 
2522                 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2523             }
2524 
2525             if (IsPrt(swMode) == FALSE)
2526             {
2527                 for (UINT_32 i = 0; i < pipeXorBits; i++)
2528                 {
2529                     InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]);
2530                 }
2531 
2532                 for (UINT_32 i = 0; i < bankXorBits; i++)
2533                 {
2534                     InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]);
2535                 }
2536             }
2537         }
2538 
2539         pEquation->numBits = blockSizeLog2;
2540     }
2541 
2542     return ret;
2543 }
2544 
2545 /**
2546 ************************************************************************************************************************
2547 *   Gfx9Lib::HwlComputeThickEquation
2548 *
2549 *   @brief
2550 *       Interface function stub of ComputeThickEquation
2551 *
2552 *   @return
2553 *       ADDR_E_RETURNCODE
2554 ************************************************************************************************************************
2555 */
HwlComputeThickEquation(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2,ADDR_EQUATION * pEquation) const2556 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
2557     AddrResourceType rsrcType,
2558     AddrSwizzleMode  swMode,
2559     UINT_32          elementBytesLog2,
2560     ADDR_EQUATION*   pEquation) const
2561 {
2562     ADDR_E_RETURNCODE ret = ADDR_OK;
2563 
2564     ADDR_ASSERT(IsTex3d(rsrcType));
2565 
2566     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2567 
2568     UINT_32 maxXorBits = blockSizeLog2;
2569     if (IsNonPrtXor(swMode))
2570     {
2571         // For non-prt-xor, maybe need to initialize some more bits for xor
2572         // The highest xor bit used in equation will be max the following 3:
2573         // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2574         // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2575         // 3. blockSizeLog2
2576 
2577         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
2578         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2579                                      GetPipeXorBits(blockSizeLog2) +
2580                                      3 * GetBankXorBits(blockSizeLog2));
2581     }
2582 
2583     for (UINT_32 i = 0; i < elementBytesLog2; i++)
2584     {
2585         InitChannel(1, 0 , i, &pEquation->addr[i]);
2586     }
2587 
2588     ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2589 
2590     const UINT_32 maxBitsUsed = 12;
2591     ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits);
2592     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2593     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2594     ADDR_CHANNEL_SETTING z[maxBitsUsed] = {};
2595 
2596     const UINT_32 extraXorBits = 24;
2597     ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2598     ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2599 
2600     for (UINT_32 i = 0; i < maxBitsUsed; i++)
2601     {
2602         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2603         InitChannel(1, 1, i, &y[i]);
2604         InitChannel(1, 2, i, &z[i]);
2605     }
2606 
2607     if (IsZOrderSwizzle(swMode))
2608     {
2609         switch (elementBytesLog2)
2610         {
2611             case 0:
2612                 pixelBit[0]  = x[0];
2613                 pixelBit[1]  = y[0];
2614                 pixelBit[2]  = x[1];
2615                 pixelBit[3]  = y[1];
2616                 pixelBit[4]  = z[0];
2617                 pixelBit[5]  = z[1];
2618                 pixelBit[6]  = x[2];
2619                 pixelBit[7]  = z[2];
2620                 pixelBit[8]  = y[2];
2621                 pixelBit[9]  = x[3];
2622                 break;
2623             case 1:
2624                 pixelBit[0]  = x[0];
2625                 pixelBit[1]  = y[0];
2626                 pixelBit[2]  = x[1];
2627                 pixelBit[3]  = y[1];
2628                 pixelBit[4]  = z[0];
2629                 pixelBit[5]  = z[1];
2630                 pixelBit[6]  = z[2];
2631                 pixelBit[7]  = y[2];
2632                 pixelBit[8]  = x[2];
2633                 break;
2634             case 2:
2635                 pixelBit[0]  = x[0];
2636                 pixelBit[1]  = y[0];
2637                 pixelBit[2]  = x[1];
2638                 pixelBit[3]  = z[0];
2639                 pixelBit[4]  = y[1];
2640                 pixelBit[5]  = z[1];
2641                 pixelBit[6]  = y[2];
2642                 pixelBit[7]  = x[2];
2643                 break;
2644             case 3:
2645                 pixelBit[0]  = x[0];
2646                 pixelBit[1]  = y[0];
2647                 pixelBit[2]  = z[0];
2648                 pixelBit[3]  = x[1];
2649                 pixelBit[4]  = z[1];
2650                 pixelBit[5]  = y[1];
2651                 pixelBit[6]  = x[2];
2652                 break;
2653             case 4:
2654                 pixelBit[0]  = x[0];
2655                 pixelBit[1]  = y[0];
2656                 pixelBit[2]  = z[0];
2657                 pixelBit[3]  = z[1];
2658                 pixelBit[4]  = y[1];
2659                 pixelBit[5]  = x[1];
2660                 break;
2661             default:
2662                 ADDR_ASSERT_ALWAYS();
2663                 ret = ADDR_INVALIDPARAMS;
2664                 break;
2665         }
2666     }
2667     else if (IsStandardSwizzle(rsrcType, swMode))
2668     {
2669         switch (elementBytesLog2)
2670         {
2671             case 0:
2672                 pixelBit[0]  = x[0];
2673                 pixelBit[1]  = x[1];
2674                 pixelBit[2]  = x[2];
2675                 pixelBit[3]  = x[3];
2676                 pixelBit[4]  = y[0];
2677                 pixelBit[5]  = y[1];
2678                 pixelBit[6]  = z[0];
2679                 pixelBit[7]  = z[1];
2680                 pixelBit[8]  = z[2];
2681                 pixelBit[9]  = y[2];
2682                 break;
2683             case 1:
2684                 pixelBit[0]  = x[0];
2685                 pixelBit[1]  = x[1];
2686                 pixelBit[2]  = x[2];
2687                 pixelBit[3]  = y[0];
2688                 pixelBit[4]  = y[1];
2689                 pixelBit[5]  = z[0];
2690                 pixelBit[6]  = z[1];
2691                 pixelBit[7]  = z[2];
2692                 pixelBit[8]  = y[2];
2693                 break;
2694             case 2:
2695                 pixelBit[0]  = x[0];
2696                 pixelBit[1]  = x[1];
2697                 pixelBit[2]  = y[0];
2698                 pixelBit[3]  = y[1];
2699                 pixelBit[4]  = z[0];
2700                 pixelBit[5]  = z[1];
2701                 pixelBit[6]  = y[2];
2702                 pixelBit[7]  = x[2];
2703                 break;
2704             case 3:
2705                 pixelBit[0]  = x[0];
2706                 pixelBit[1]  = y[0];
2707                 pixelBit[2]  = y[1];
2708                 pixelBit[3]  = z[0];
2709                 pixelBit[4]  = z[1];
2710                 pixelBit[5]  = x[1];
2711                 pixelBit[6]  = x[2];
2712                 break;
2713             case 4:
2714                 pixelBit[0]  = y[0];
2715                 pixelBit[1]  = y[1];
2716                 pixelBit[2]  = z[0];
2717                 pixelBit[3]  = z[1];
2718                 pixelBit[4]  = x[0];
2719                 pixelBit[5]  = x[1];
2720                 break;
2721             default:
2722                 ADDR_ASSERT_ALWAYS();
2723                 ret = ADDR_INVALIDPARAMS;
2724                 break;
2725         }
2726     }
2727     else
2728     {
2729         ADDR_ASSERT_ALWAYS();
2730         ret = ADDR_INVALIDPARAMS;
2731     }
2732 
2733     if (ret == ADDR_OK)
2734     {
2735         Dim3d microBlockDim = Block1K_3d[elementBytesLog2];
2736         UINT_32 xIdx = Log2(microBlockDim.w);
2737         UINT_32 yIdx = Log2(microBlockDim.h);
2738         UINT_32 zIdx = Log2(microBlockDim.d);
2739 
2740         pixelBit = pEquation->addr;
2741 
2742         const UINT_32 lowBits = 10;
2743         ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
2744         ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
2745 
2746         for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2747         {
2748             if ((i % 3) == 0)
2749             {
2750                 pixelBit[i] = x[xIdx++];
2751             }
2752             else if ((i % 3) == 1)
2753             {
2754                 pixelBit[i] = z[zIdx++];
2755             }
2756             else
2757             {
2758                 pixelBit[i] = y[yIdx++];
2759             }
2760         }
2761 
2762         for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2763         {
2764             if ((i % 3) == 0)
2765             {
2766                 xorExtra[i - blockSizeLog2] = x[xIdx++];
2767             }
2768             else if ((i % 3) == 1)
2769             {
2770                 xorExtra[i - blockSizeLog2] = z[zIdx++];
2771             }
2772             else
2773             {
2774                 xorExtra[i - blockSizeLog2] = y[yIdx++];
2775             }
2776         }
2777 
2778         if (IsXor(swMode))
2779         {
2780             // Fill XOR bits
2781             UINT_32 pipeStart = m_pipeInterleaveLog2;
2782             UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2783             for (UINT_32 i = 0; i < pipeXorBits; i++)
2784             {
2785                 UINT_32               xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
2786                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2787                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2788 
2789                 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2790 
2791                 UINT_32               xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
2792                 ADDR_CHANNEL_SETTING* pXor2Src   = (xor2BitPos < blockSizeLog2) ?
2793                                                    &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2794 
2795                 InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
2796             }
2797 
2798             UINT_32 bankStart = pipeStart + pipeXorBits;
2799             UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2800             for (UINT_32 i = 0; i < bankXorBits; i++)
2801             {
2802                 UINT_32               xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
2803                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2804                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2805 
2806                 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2807 
2808                 UINT_32               xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
2809                 ADDR_CHANNEL_SETTING* pXor2Src   = (xor2BitPos < blockSizeLog2) ?
2810                                                    &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2811 
2812                 InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
2813             }
2814         }
2815 
2816         pEquation->numBits = blockSizeLog2;
2817     }
2818 
2819     return ret;
2820 }
2821 
2822 /**
2823 ************************************************************************************************************************
2824 *   Gfx9Lib::IsValidDisplaySwizzleMode
2825 *
2826 *   @brief
2827 *       Check if a swizzle mode is supported by display engine
2828 *
2829 *   @return
2830 *       TRUE is swizzle mode is supported by display engine
2831 ************************************************************************************************************************
2832 */
IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2833 BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
2834     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2835 {
2836     BOOL_32 support = FALSE;
2837 
2838     const AddrResourceType resourceType = pIn->resourceType;
2839     (void)resourceType;
2840     const AddrSwizzleMode swizzleMode = pIn->swizzleMode;
2841 
2842     if (m_settings.isDce12)
2843     {
2844         switch (swizzleMode)
2845         {
2846             case ADDR_SW_256B_D:
2847             case ADDR_SW_256B_R:
2848                 support = (pIn->bpp == 32);
2849                 break;
2850 
2851             case ADDR_SW_LINEAR:
2852             case ADDR_SW_4KB_D:
2853             case ADDR_SW_4KB_R:
2854             case ADDR_SW_64KB_D:
2855             case ADDR_SW_64KB_R:
2856             case ADDR_SW_VAR_D:
2857             case ADDR_SW_VAR_R:
2858             case ADDR_SW_4KB_D_X:
2859             case ADDR_SW_4KB_R_X:
2860             case ADDR_SW_64KB_D_X:
2861             case ADDR_SW_64KB_R_X:
2862             case ADDR_SW_VAR_D_X:
2863             case ADDR_SW_VAR_R_X:
2864                 support = (pIn->bpp <= 64);
2865                 break;
2866 
2867             default:
2868                 break;
2869         }
2870     }
2871     else if (m_settings.isDcn1)
2872     {
2873         switch (swizzleMode)
2874         {
2875             case ADDR_SW_4KB_D:
2876             case ADDR_SW_64KB_D:
2877             case ADDR_SW_VAR_D:
2878             case ADDR_SW_64KB_D_T:
2879             case ADDR_SW_4KB_D_X:
2880             case ADDR_SW_64KB_D_X:
2881             case ADDR_SW_VAR_D_X:
2882                 support = (pIn->bpp == 64);
2883                 break;
2884 
2885             case ADDR_SW_LINEAR:
2886             case ADDR_SW_4KB_S:
2887             case ADDR_SW_64KB_S:
2888             case ADDR_SW_VAR_S:
2889             case ADDR_SW_64KB_S_T:
2890             case ADDR_SW_4KB_S_X:
2891             case ADDR_SW_64KB_S_X:
2892             case ADDR_SW_VAR_S_X:
2893                 support = (pIn->bpp <= 64);
2894                 break;
2895 
2896             default:
2897                 break;
2898         }
2899     }
2900     else
2901     {
2902         ADDR_NOT_IMPLEMENTED();
2903     }
2904 
2905     return support;
2906 }
2907 
2908 /**
2909 ************************************************************************************************************************
2910 *   Gfx9Lib::HwlComputePipeBankXor
2911 *
2912 *   @brief
2913 *       Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
2914 *
2915 *   @return
2916 *       PipeBankXor value
2917 ************************************************************************************************************************
2918 */
HwlComputePipeBankXor(const ADDR2_COMPUTE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT * pOut) const2919 ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor(
2920     const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
2921     ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut) const
2922 {
2923     UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
2924     UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
2925     UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
2926 
2927     UINT_32 pipeXor = 0;
2928     UINT_32 bankXor = 0;
2929 
2930     const UINT_32 bankMask = (1 << bankBits) - 1;
2931     const UINT_32 index    = pIn->surfIndex & bankMask;
2932 
2933     const UINT_32 bpp      = pIn->flags.fmask ?
2934                              GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format);
2935     if (bankBits == 4)
2936     {
2937         static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
2938         static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
2939 
2940         bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index];
2941     }
2942     else if (bankBits > 0)
2943     {
2944         UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1;
2945         bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease;
2946         bankXor = (index * bankIncrease) & bankMask;
2947     }
2948 
2949     pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor;
2950 
2951     return ADDR_OK;
2952 }
2953 
2954 /**
2955 ************************************************************************************************************************
2956 *   Gfx9Lib::HwlComputeSlicePipeBankXor
2957 *
2958 *   @brief
2959 *       Generate slice PipeBankXor value based on base PipeBankXor value and slice id
2960 *
2961 *   @return
2962 *       PipeBankXor value
2963 ************************************************************************************************************************
2964 */
HwlComputeSlicePipeBankXor(const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT * pOut) const2965 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor(
2966     const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
2967     ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut) const
2968 {
2969     UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
2970     UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
2971     UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
2972 
2973     UINT_32 pipeXor        = ReverseBitVector(pIn->slice, pipeBits);
2974     UINT_32 bankXor        = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
2975 
2976     pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits));
2977 
2978     return ADDR_OK;
2979 }
2980 
2981 /**
2982 ************************************************************************************************************************
2983 *   Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
2984 *
2985 *   @brief
2986 *       Compute sub resource offset to support swizzle pattern
2987 *
2988 *   @return
2989 *       Offset
2990 ************************************************************************************************************************
2991 */
HwlComputeSubResourceOffsetForSwizzlePattern(const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT * pIn,ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT * pOut) const2992 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
2993     const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
2994     ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut) const
2995 {
2996     ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
2997 
2998     UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
2999     UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
3000     UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
3001     UINT_32 pipeXor        = ReverseBitVector(pIn->slice, pipeBits);
3002     UINT_32 bankXor        = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3003     UINT_32 pipeBankXor    = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2;
3004 
3005     pOut->offset = pIn->slice * pIn->sliceSize +
3006                    pIn->macroBlockOffset +
3007                    (pIn->mipTailOffset ^ pipeBankXor) -
3008                    static_cast<UINT_64>(pipeBankXor);
3009     return ADDR_OK;
3010 }
3011 
3012 /**
3013 ************************************************************************************************************************
3014 *   Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
3015 *
3016 *   @brief
3017 *       Compute surface info sanity check
3018 *
3019 *   @return
3020 *       Offset
3021 ************************************************************************************************************************
3022 */
HwlComputeSurfaceInfoSanityCheck(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3023 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
3024     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3025 {
3026     BOOL_32 invalid = FALSE;
3027 
3028     if ((pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
3029     {
3030         invalid = TRUE;
3031     }
3032     else if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE)    ||
3033              (pIn->resourceType >= ADDR_RSRC_MAX_TYPE))
3034     {
3035         invalid = TRUE;
3036     }
3037 
3038     BOOL_32 mipmap = (pIn->numMipLevels > 1);
3039     BOOL_32 msaa   = (pIn->numFrags > 1);
3040 
3041     ADDR2_SURFACE_FLAGS flags = pIn->flags;
3042     BOOL_32 zbuffer = (flags.depth || flags.stencil);
3043     BOOL_32 color   = flags.color;
3044     BOOL_32 display = flags.display || flags.rotated;
3045 
3046     AddrResourceType rsrcType    = pIn->resourceType;
3047     BOOL_32          tex3d       = IsTex3d(rsrcType);
3048     AddrSwizzleMode  swizzle     = pIn->swizzleMode;
3049     BOOL_32          linear      = IsLinear(swizzle);
3050     BOOL_32          blk256B     = IsBlock256b(swizzle);
3051     BOOL_32          blkVar      = IsBlockVariable(swizzle);
3052     BOOL_32          isNonPrtXor = IsNonPrtXor(swizzle);
3053     BOOL_32          prt         = flags.prt;
3054     BOOL_32          stereo      = flags.qbStereo;
3055 
3056     if (invalid == FALSE)
3057     {
3058         if ((pIn->numFrags > 1) &&
3059             (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
3060         {
3061             // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3062             invalid = TRUE;
3063         }
3064     }
3065 
3066     if (invalid == FALSE)
3067     {
3068         switch (rsrcType)
3069         {
3070             case ADDR_RSRC_TEX_1D:
3071                 invalid = msaa || zbuffer || display || (linear == FALSE) || stereo;
3072                 break;
3073             case ADDR_RSRC_TEX_2D:
3074                 invalid = (msaa && mipmap) || (stereo && msaa) || (stereo && mipmap);
3075                 break;
3076             case ADDR_RSRC_TEX_3D:
3077                 invalid = msaa || zbuffer || display || stereo;
3078                 break;
3079             default:
3080                 invalid = TRUE;
3081                 break;
3082         }
3083     }
3084 
3085     if (invalid == FALSE)
3086     {
3087         if (display)
3088         {
3089             invalid = (IsValidDisplaySwizzleMode(pIn) == FALSE);
3090         }
3091     }
3092 
3093     if (invalid == FALSE)
3094     {
3095         if (linear)
3096         {
3097             invalid = ((ADDR_RSRC_TEX_1D != rsrcType) && prt) ||
3098                       zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0);
3099         }
3100         else
3101         {
3102             if (blk256B || blkVar || isNonPrtXor)
3103             {
3104                 invalid = prt;
3105                 if (blk256B)
3106                 {
3107                     invalid = invalid || zbuffer || tex3d || mipmap || msaa;
3108                 }
3109             }
3110 
3111             if (invalid == FALSE)
3112             {
3113                 if (IsZOrderSwizzle(swizzle))
3114                 {
3115                     invalid = color && msaa;
3116                 }
3117                 else if (IsStandardSwizzle(rsrcType, swizzle))
3118                 {
3119                     invalid = zbuffer;
3120                 }
3121                 else if (IsDisplaySwizzle(rsrcType, swizzle))
3122                 {
3123                     invalid = zbuffer;
3124                 }
3125                 else if (IsRotateSwizzle(swizzle))
3126                 {
3127                     invalid = zbuffer || (pIn->bpp > 64) || tex3d;
3128                 }
3129                 else
3130                 {
3131                     ADDR_ASSERT(!"invalid swizzle mode");
3132                     invalid = TRUE;
3133                 }
3134             }
3135         }
3136     }
3137 
3138     ADDR_ASSERT(invalid == FALSE);
3139 
3140     return invalid ? ADDR_INVALIDPARAMS : ADDR_OK;
3141 }
3142 
3143 /**
3144 ************************************************************************************************************************
3145 *   Gfx9Lib::HwlGetPreferredSurfaceSetting
3146 *
3147 *   @brief
3148 *       Internal function to get suggested surface information for cliet to use
3149 *
3150 *   @return
3151 *       ADDR_E_RETURNCODE
3152 ************************************************************************************************************************
3153 */
HwlGetPreferredSurfaceSetting(const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT * pIn,ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT * pOut) const3154 ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
3155     const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
3156     ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) const
3157 {
3158     // Macro define resource block type
3159     enum AddrBlockType
3160     {
3161         AddrBlockMicro     = 0, // Resource uses 256B block
3162         AddrBlock4KB       = 1, // Resource uses 4KB block
3163         AddrBlock64KB      = 2, // Resource uses 64KB block
3164         AddrBlockVar       = 3, // Resource uses var blcok
3165         AddrBlockLinear    = 4, // Resource uses linear swizzle mode
3166 
3167         AddrBlockMaxTiledType = AddrBlock64KB + 1,
3168     };
3169 
3170     enum AddrBlockSet
3171     {
3172         AddrBlockSetMicro     = 1 << AddrBlockMicro,
3173         AddrBlockSetMacro4KB  = 1 << AddrBlock4KB,
3174         AddrBlockSetMacro64KB = 1 << AddrBlock64KB,
3175         AddrBlockSetVar       = 1 << AddrBlockVar,
3176         AddrBlockSetLinear    = 1 << AddrBlockLinear,
3177 
3178         AddrBlockSetMacro = AddrBlockSetMacro4KB | AddrBlockSetMacro64KB,
3179     };
3180 
3181     enum AddrSwSet
3182     {
3183         AddrSwSetZ = 1 << ADDR_SW_Z,
3184         AddrSwSetS = 1 << ADDR_SW_S,
3185         AddrSwSetD = 1 << ADDR_SW_D,
3186         AddrSwSetR = 1 << ADDR_SW_R,
3187 
3188         AddrSwSetAll = AddrSwSetZ | AddrSwSetS | AddrSwSetD | AddrSwSetR,
3189     };
3190 
3191     ADDR_E_RETURNCODE returnCode = ADDR_OK;
3192     ElemLib*          pElemLib   = GetElemLib();
3193 
3194     // Set format to INVALID will skip this conversion
3195     UINT_32 expandX = 1;
3196     UINT_32 expandY = 1;
3197     UINT_32 bpp     = pIn->bpp;
3198     UINT_32 width   = pIn->width;
3199     UINT_32 height  = pIn->height;
3200 
3201     if (pIn->format != ADDR_FMT_INVALID)
3202     {
3203         // Don't care for this case
3204         ElemMode elemMode = ADDR_UNCOMPRESSED;
3205 
3206         // Get compression/expansion factors and element mode which indicates compression/expansion
3207         bpp = pElemLib->GetBitsPerPixel(pIn->format,
3208                                         &elemMode,
3209                                         &expandX,
3210                                         &expandY);
3211 
3212         UINT_32 basePitch = 0;
3213         GetElemLib()->AdjustSurfaceInfo(elemMode,
3214                                         expandX,
3215                                         expandY,
3216                                         &bpp,
3217                                         &basePitch,
3218                                         &width,
3219                                         &height);
3220     }
3221 
3222     UINT_32 numSamples   = Max(pIn->numSamples, 1u);
3223     UINT_32 numFrags     = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
3224     UINT_32 slice        = Max(pIn->numSlices, 1u);
3225     UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
3226     UINT_32 minSizeAlign = NextPow2(pIn->minSizeAlign);
3227 
3228     if (pIn->flags.fmask)
3229     {
3230         bpp        = GetFmaskBpp(numSamples, numFrags);
3231         numFrags   = 1;
3232         numSamples = 1;
3233         pOut->resourceType = ADDR_RSRC_TEX_2D;
3234     }
3235     else
3236     {
3237         // The output may get changed for volume(3D) texture resource in future
3238         pOut->resourceType = pIn->resourceType;
3239     }
3240 
3241     if (bpp < 8)
3242     {
3243         ADDR_ASSERT_ALWAYS();
3244 
3245         returnCode = ADDR_INVALIDPARAMS;
3246     }
3247     else if (IsTex1d(pOut->resourceType))
3248     {
3249         pOut->swizzleMode         = ADDR_SW_LINEAR;
3250         pOut->validBlockSet.value = AddrBlockSetLinear;
3251         pOut->canXor              = FALSE;
3252     }
3253     else
3254     {
3255         ADDR2_BLOCK_SET blockSet;
3256         blockSet.value = 0;
3257 
3258         ADDR2_SWTYPE_SET addrPreferredSwSet, addrValidSwSet, clientPreferredSwSet;
3259         addrPreferredSwSet.value = AddrSwSetS;
3260         addrValidSwSet           = addrPreferredSwSet;
3261         clientPreferredSwSet     = pIn->preferredSwSet;
3262 
3263         if (clientPreferredSwSet.value == 0)
3264         {
3265             clientPreferredSwSet.value = AddrSwSetAll;
3266         }
3267 
3268         // prt Xor and non-xor will have less height align requirement for stereo surface
3269         BOOL_32 prtXor          = (pIn->flags.prt || pIn->flags.qbStereo) && (pIn->noXor == FALSE);
3270         BOOL_32 displayResource = FALSE;
3271 
3272         pOut->canXor = (pIn->flags.prt == FALSE) && (pIn->noXor == FALSE);
3273 
3274         // Filter out improper swType and blockSet by HW restriction
3275         if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
3276         {
3277             ADDR_ASSERT(IsTex2d(pOut->resourceType));
3278             blockSet.value           = AddrBlockSetMacro;
3279             addrPreferredSwSet.value = AddrSwSetZ;
3280             addrValidSwSet.value     = AddrSwSetZ;
3281 
3282             if (pIn->flags.depth && pIn->flags.texture)
3283             {
3284                 if (((bpp == 16) && (numFrags >= 4)) ||
3285                     ((bpp == 32) && (numFrags >= 2)))
3286                 {
3287                     // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
3288                     // equation from wrong address within memory range a tile covered and use the
3289                     // garbage data for compressed Z reading which finally leads to corruption.
3290                     pOut->canXor = FALSE;
3291                     prtXor       = FALSE;
3292                 }
3293             }
3294         }
3295         else if (ElemLib::IsBlockCompressed(pIn->format))
3296         {
3297             // block compressed formats (BCx, ASTC, ETC2) must be either S or D modes.
3298             // Not sure under what circumstances "_D" would be appropriate as these formats
3299             // are not displayable.
3300             blockSet.value = AddrBlockSetMacro;
3301 
3302             // This isn't to be used as texture and caller doesn't allow macro tiled.
3303             if ((pIn->flags.texture == FALSE) &&
3304                 (pIn->forbiddenBlock.macro4KB && pIn->forbiddenBlock.macro64KB))
3305             {
3306                 blockSet.value |= AddrBlockSetLinear;
3307             }
3308 
3309             addrPreferredSwSet.value = AddrSwSetD;
3310             addrValidSwSet.value     = AddrSwSetS | AddrSwSetD;
3311         }
3312         else if (ElemLib::IsMacroPixelPacked(pIn->format))
3313         {
3314             // macro pixel packed formats (BG_RG, GB_GR) does not support the Z modes.
3315             // Its notclear under what circumstances the D or R modes would be appropriate
3316             // since these formats are not displayable.
3317             blockSet.value  = AddrBlockSetLinear | AddrBlockSetMacro;
3318 
3319             addrPreferredSwSet.value = AddrSwSetS;
3320             addrValidSwSet.value     = AddrSwSetS | AddrSwSetD | AddrSwSetR;
3321         }
3322         else if (IsTex3d(pOut->resourceType))
3323         {
3324             blockSet.value = AddrBlockSetLinear | AddrBlockSetMacro;
3325 
3326             if (pIn->flags.prt)
3327             {
3328                 // PRT cannot use SW_D which gives an unexpected block dimension
3329                 addrPreferredSwSet.value = AddrSwSetZ;
3330                 addrValidSwSet.value     = AddrSwSetZ | AddrSwSetS;
3331             }
3332             else if ((numMipLevels > 1) && (slice >= width) && (slice >= height))
3333             {
3334                 // When depth (Z) is the maximum dimension then must use one of the SW_*_S
3335                 // or SW_*_Z modes if mipmapping is desired on a 3D surface
3336                 addrPreferredSwSet.value = AddrSwSetZ;
3337                 addrValidSwSet.value     = AddrSwSetZ | AddrSwSetS;
3338             }
3339             else if (pIn->flags.color)
3340             {
3341                 addrPreferredSwSet.value = AddrSwSetD;
3342                 addrValidSwSet.value     = AddrSwSetZ | AddrSwSetS | AddrSwSetD;
3343             }
3344             else
3345             {
3346                 addrPreferredSwSet.value = AddrSwSetZ;
3347                 addrValidSwSet.value     = AddrSwSetZ | AddrSwSetD;
3348                 if (bpp != 128)
3349                 {
3350                     addrValidSwSet.value |= AddrSwSetS;
3351                 }
3352             }
3353         }
3354         else
3355         {
3356             addrPreferredSwSet.value = ((pIn->flags.display == TRUE) ||
3357                                         (pIn->flags.overlay == TRUE) ||
3358                                         (pIn->bpp           == 128)) ? AddrSwSetD : AddrSwSetS;
3359 
3360             addrValidSwSet.value     = AddrSwSetS | AddrSwSetD | AddrSwSetR;
3361 
3362             if (numMipLevels > 1)
3363             {
3364                 ADDR_ASSERT(numFrags == 1);
3365                 blockSet.value = AddrBlockSetLinear | AddrBlockSetMacro;
3366             }
3367             else if ((numFrags > 1) || (numSamples > 1))
3368             {
3369                 ADDR_ASSERT(IsTex2d(pOut->resourceType));
3370                 blockSet.value = AddrBlockSetMacro;
3371             }
3372             else
3373             {
3374                 ADDR_ASSERT(IsTex2d(pOut->resourceType));
3375                 blockSet.value = AddrBlockSetLinear | AddrBlockSetMicro | AddrBlockSetMacro;
3376 
3377                 displayResource = pIn->flags.rotated || pIn->flags.display;
3378 
3379                 if (displayResource)
3380                 {
3381                     addrPreferredSwSet.value = pIn->flags.rotated ? AddrSwSetR : AddrSwSetD;
3382 
3383                     if (pIn->bpp > 64)
3384                     {
3385                         blockSet.value = 0;
3386                     }
3387                     else if (m_settings.isDce12)
3388                     {
3389                         if (pIn->bpp != 32)
3390                         {
3391                             blockSet.micro = FALSE;
3392                         }
3393 
3394                         // DCE12 does not support display surface to be _T swizzle mode
3395                         prtXor = FALSE;
3396 
3397                         addrValidSwSet.value = AddrSwSetD | AddrSwSetR;
3398                     }
3399                     else if (m_settings.isDcn1)
3400                     {
3401                         // _R is not supported by Dcn1
3402                         if (pIn->bpp == 64)
3403                         {
3404                             addrPreferredSwSet.value = AddrSwSetD;
3405                             addrValidSwSet.value     = AddrSwSetD;
3406                         }
3407                         else
3408                         {
3409                             addrPreferredSwSet.value = AddrSwSetS;
3410                             addrValidSwSet.value     = AddrSwSetS | AddrSwSetD;
3411                         }
3412 
3413                         blockSet.micro = FALSE;
3414                     }
3415                     else
3416                     {
3417                         ADDR_NOT_IMPLEMENTED();
3418                         returnCode = ADDR_NOTSUPPORTED;
3419                     }
3420                 }
3421             }
3422         }
3423 
3424         ADDR_ASSERT((addrValidSwSet.value & addrPreferredSwSet.value) == addrPreferredSwSet.value);
3425 
3426         pOut->clientPreferredSwSet = clientPreferredSwSet;
3427 
3428         // Clamp client preferred set to valid set
3429         clientPreferredSwSet.value &= addrValidSwSet.value;
3430 
3431         pOut->validSwTypeSet = addrValidSwSet;
3432 
3433         if (clientPreferredSwSet.value == 0)
3434         {
3435             // Client asks for an invalid swizzle type...
3436             ADDR_ASSERT_ALWAYS();
3437             returnCode = ADDR_INVALIDPARAMS;
3438         }
3439         else
3440         {
3441             if (IsPow2(clientPreferredSwSet.value))
3442             {
3443                 // Only one swizzle type left, use it directly
3444                 addrPreferredSwSet.value = clientPreferredSwSet.value;
3445             }
3446             else if ((clientPreferredSwSet.value & addrPreferredSwSet.value) == 0)
3447             {
3448                 // Client wants 2 or more a valid swizzle type but none of them is addrlib preferred
3449                 if (clientPreferredSwSet.sw_D)
3450                 {
3451                     addrPreferredSwSet.value = AddrSwSetD;
3452                 }
3453                 else if (clientPreferredSwSet.sw_Z)
3454                 {
3455                     addrPreferredSwSet.value = AddrSwSetZ;
3456                 }
3457                 else if (clientPreferredSwSet.sw_R)
3458                 {
3459                     addrPreferredSwSet.value = AddrSwSetR;
3460                 }
3461                 else
3462                 {
3463                     ADDR_ASSERT(clientPreferredSwSet.sw_S);
3464                     addrPreferredSwSet.value = AddrSwSetS;
3465                 }
3466             }
3467 
3468             if ((numFrags > 1) &&
3469                 (GetBlockSize(ADDR_SW_4KB) < (m_pipeInterleaveBytes * numFrags)))
3470             {
3471                 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3472                 blockSet.macro4KB = FALSE;
3473             }
3474 
3475             if (pIn->flags.prt)
3476             {
3477                 blockSet.value &= AddrBlockSetMacro64KB;
3478             }
3479 
3480             // Apply customized forbidden setting
3481             blockSet.value &= ~pIn->forbiddenBlock.value;
3482 
3483             if (pIn->maxAlign > 0)
3484             {
3485                 if (pIn->maxAlign < GetBlockSize(ADDR_SW_64KB))
3486                 {
3487                     blockSet.macro64KB = FALSE;
3488                 }
3489 
3490                 if (pIn->maxAlign < GetBlockSize(ADDR_SW_4KB))
3491                 {
3492                     blockSet.macro4KB = FALSE;
3493                 }
3494 
3495                 if (pIn->maxAlign < GetBlockSize(ADDR_SW_256B))
3496                 {
3497                     blockSet.micro = FALSE;
3498                 }
3499             }
3500 
3501             Dim3d blkAlign[AddrBlockMaxTiledType]  = {{0}, {0}, {0}};
3502             Dim3d paddedDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
3503             UINT_64 padSize[AddrBlockMaxTiledType] = {0};
3504 
3505             if (blockSet.micro)
3506             {
3507                 returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlockMicro].w,
3508                                                           &blkAlign[AddrBlockMicro].h,
3509                                                           &blkAlign[AddrBlockMicro].d,
3510                                                           bpp,
3511                                                           numFrags,
3512                                                           pOut->resourceType,
3513                                                           ADDR_SW_256B);
3514 
3515                 if (returnCode == ADDR_OK)
3516                 {
3517                     if (displayResource)
3518                     {
3519                         blkAlign[AddrBlockMicro].w = PowTwoAlign(blkAlign[AddrBlockMicro].w, 32);
3520                     }
3521                     else if ((blkAlign[AddrBlockMicro].w >= width) && (blkAlign[AddrBlockMicro].h >= height) &&
3522                              (minSizeAlign <= GetBlockSize(ADDR_SW_256B)))
3523                     {
3524                         // If one 256B block can contain the surface, don't bother bigger block type
3525                         blockSet.macro4KB = FALSE;
3526                         blockSet.macro64KB = FALSE;
3527                         blockSet.var = FALSE;
3528                     }
3529 
3530                     padSize[AddrBlockMicro] = ComputePadSize(&blkAlign[AddrBlockMicro], width, height,
3531                                                              slice, &paddedDim[AddrBlockMicro]);
3532                 }
3533             }
3534 
3535             if ((returnCode == ADDR_OK) && blockSet.macro4KB)
3536             {
3537                 returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlock4KB].w,
3538                                                           &blkAlign[AddrBlock4KB].h,
3539                                                           &blkAlign[AddrBlock4KB].d,
3540                                                           bpp,
3541                                                           numFrags,
3542                                                           pOut->resourceType,
3543                                                           ADDR_SW_4KB);
3544 
3545                 if (returnCode == ADDR_OK)
3546                 {
3547                     if (displayResource)
3548                     {
3549                         blkAlign[AddrBlock4KB].w = PowTwoAlign(blkAlign[AddrBlock4KB].w, 32);
3550                     }
3551 
3552                     padSize[AddrBlock4KB] = ComputePadSize(&blkAlign[AddrBlock4KB], width, height,
3553                                                            slice, &paddedDim[AddrBlock4KB]);
3554 
3555                     ADDR_ASSERT(padSize[AddrBlock4KB] >= padSize[AddrBlockMicro]);
3556                 }
3557             }
3558 
3559             if ((returnCode == ADDR_OK) && blockSet.macro64KB)
3560             {
3561                 returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlock64KB].w,
3562                                                           &blkAlign[AddrBlock64KB].h,
3563                                                           &blkAlign[AddrBlock64KB].d,
3564                                                           bpp,
3565                                                           numFrags,
3566                                                           pOut->resourceType,
3567                                                           ADDR_SW_64KB);
3568 
3569                 if (returnCode == ADDR_OK)
3570                 {
3571                     if (displayResource)
3572                     {
3573                         blkAlign[AddrBlock64KB].w = PowTwoAlign(blkAlign[AddrBlock64KB].w, 32);
3574                     }
3575 
3576                     padSize[AddrBlock64KB] = ComputePadSize(&blkAlign[AddrBlock64KB], width, height,
3577                                                             slice, &paddedDim[AddrBlock64KB]);
3578 
3579                     ADDR_ASSERT(padSize[AddrBlock64KB] >= padSize[AddrBlock4KB]);
3580                     ADDR_ASSERT(padSize[AddrBlock64KB] >= padSize[AddrBlockMicro]);
3581                 }
3582             }
3583 
3584             if (returnCode == ADDR_OK)
3585             {
3586                 UINT_64 minSizeAlignInElement = Max(minSizeAlign / (bpp >> 3), 1u);
3587 
3588                 for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3589                 {
3590                     padSize[i] = PowTwoAlign(padSize[i], minSizeAlignInElement);
3591                 }
3592 
3593                 // Use minimum block type which meets all conditions above if flag minimizeAlign was set
3594                 if (pIn->flags.minimizeAlign)
3595                 {
3596                     // If padded size of 64KB block is larger than padded size of 256B block or 4KB
3597                     // block, filter out 64KB block from candidate list
3598                     if (blockSet.macro64KB &&
3599                         ((blockSet.micro && (padSize[AddrBlockMicro] < padSize[AddrBlock64KB])) ||
3600                          (blockSet.macro4KB && (padSize[AddrBlock4KB] < padSize[AddrBlock64KB]))))
3601                     {
3602                         blockSet.macro64KB = FALSE;
3603                     }
3604 
3605                     // If padded size of 4KB block is larger than padded size of 256B block,
3606                     // filter out 4KB block from candidate list
3607                     if (blockSet.macro4KB &&
3608                         blockSet.micro &&
3609                         (padSize[AddrBlockMicro] < padSize[AddrBlock4KB]))
3610                     {
3611                         blockSet.macro4KB = FALSE;
3612                     }
3613                 }
3614                 // Filter out 64KB/4KB block if a smaller block type has 2/3 or less memory footprint
3615                 else if (pIn->flags.opt4space)
3616                 {
3617                     UINT_64 threshold = blockSet.micro ? padSize[AddrBlockMicro] :
3618                                         (blockSet.macro4KB ? padSize[AddrBlock4KB] : padSize[AddrBlock64KB]);
3619 
3620                     threshold += threshold >> 1;
3621 
3622                     if (blockSet.macro64KB && (padSize[AddrBlock64KB] > threshold))
3623                     {
3624                         blockSet.macro64KB = FALSE;
3625                     }
3626 
3627                     if (blockSet.macro4KB && (padSize[AddrBlock4KB] > threshold))
3628                     {
3629                         blockSet.macro4KB = FALSE;
3630                     }
3631                 }
3632                 else
3633                 {
3634                     if (blockSet.macro64KB &&
3635                         (padSize[AddrBlock64KB] >= static_cast<UINT_64>(width) * height * slice * 2) &&
3636                         ((blockSet.value & ~AddrBlockSetMacro64KB) != 0))
3637                     {
3638                         // If 64KB block waste more than half memory on padding, filter it out from
3639                         // candidate list when it is not the only choice left
3640                         blockSet.macro64KB = FALSE;
3641                     }
3642                 }
3643 
3644                 if (blockSet.value == 0)
3645                 {
3646                     // Bad things happen, client will not get any useful information from AddrLib.
3647                     // Maybe we should fill in some output earlier instead of outputing nothing?
3648                     ADDR_ASSERT_ALWAYS();
3649                     returnCode = ADDR_INVALIDPARAMS;
3650                 }
3651                 else
3652                 {
3653                     pOut->validBlockSet = blockSet;
3654                     pOut->canXor = pOut->canXor &&
3655                                    (blockSet.macro4KB || blockSet.macro64KB || blockSet.var);
3656 
3657                     if (blockSet.macro64KB || blockSet.macro4KB)
3658                     {
3659                         if (addrPreferredSwSet.value == AddrSwSetZ)
3660                         {
3661                             pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_Z : ADDR_SW_4KB_Z;
3662                         }
3663                         else if (addrPreferredSwSet.value == AddrSwSetS)
3664                         {
3665                             pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_S : ADDR_SW_4KB_S;
3666                         }
3667                         else if (addrPreferredSwSet.value == AddrSwSetD)
3668                         {
3669                             pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_D : ADDR_SW_4KB_D;
3670                         }
3671                         else
3672                         {
3673                             ADDR_ASSERT(addrPreferredSwSet.value == AddrSwSetR);
3674                             pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_R : ADDR_SW_4KB_R;
3675                         }
3676 
3677                         if (prtXor && blockSet.macro64KB)
3678                         {
3679                             // Client wants PRTXOR, give back _T swizzle mode if 64KB is available
3680                             const UINT_32 prtGap = ADDR_SW_64KB_Z_T - ADDR_SW_64KB_Z;
3681                             pOut->swizzleMode = static_cast<AddrSwizzleMode>(pOut->swizzleMode + prtGap);
3682                         }
3683                         else if (pOut->canXor)
3684                         {
3685                             // Client wants XOR and this is allowed, return XOR version swizzle mode
3686                             const UINT_32 xorGap = ADDR_SW_4KB_Z_X - ADDR_SW_4KB_Z;
3687                             pOut->swizzleMode = static_cast<AddrSwizzleMode>(pOut->swizzleMode + xorGap);
3688                         }
3689                     }
3690                     else if (blockSet.micro)
3691                     {
3692                         if (addrPreferredSwSet.value == AddrSwSetS)
3693                         {
3694                             pOut->swizzleMode = ADDR_SW_256B_S;
3695                         }
3696                         else if (addrPreferredSwSet.value == AddrSwSetD)
3697                         {
3698                             pOut->swizzleMode = ADDR_SW_256B_D;
3699                         }
3700                         else
3701                         {
3702                             ADDR_ASSERT(addrPreferredSwSet.value == AddrSwSetR);
3703                             pOut->swizzleMode = ADDR_SW_256B_R;
3704                         }
3705                     }
3706                     else if (blockSet.linear)
3707                     {
3708                         // Fall into this branch doesn't mean linear is suitable, only no other choices!
3709                         pOut->swizzleMode = ADDR_SW_LINEAR;
3710                     }
3711                     else
3712                     {
3713                         ADDR_ASSERT(blockSet.var);
3714 
3715                         // Designer consider VAR swizzle mode is usless for most cases
3716                         ADDR_UNHANDLED_CASE();
3717 
3718                         returnCode = ADDR_NOTSUPPORTED;
3719                     }
3720 
3721 #if DEBUG
3722                     // Post sanity check, at least AddrLib should accept the output generated by its own
3723                     if (pOut->swizzleMode != ADDR_SW_LINEAR)
3724                     {
3725                         ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
3726                         localIn.flags = pIn->flags;
3727                         localIn.swizzleMode = pOut->swizzleMode;
3728                         localIn.resourceType = pOut->resourceType;
3729                         localIn.format = pIn->format;
3730                         localIn.bpp = bpp;
3731                         localIn.width = width;
3732                         localIn.height = height;
3733                         localIn.numSlices = slice;
3734                         localIn.numMipLevels = numMipLevels;
3735                         localIn.numSamples = numSamples;
3736                         localIn.numFrags = numFrags;
3737 
3738                         HwlComputeSurfaceInfoSanityCheck(&localIn);
3739 
3740                     }
3741 #endif
3742                 }
3743             }
3744         }
3745     }
3746 
3747     return returnCode;
3748 }
3749 
3750 /**
3751 ************************************************************************************************************************
3752 *   Gfx9Lib::ComputeStereoInfo
3753 *
3754 *   @brief
3755 *       Compute height alignment and right eye pipeBankXor for stereo surface
3756 *
3757 *   @return
3758 *       Error code
3759 *
3760 ************************************************************************************************************************
3761 */
ComputeStereoInfo(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut,UINT_32 * pHeightAlign) const3762 ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
3763     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
3764     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut,
3765     UINT_32*                                pHeightAlign
3766     ) const
3767 {
3768     ADDR_E_RETURNCODE returnCode = ADDR_OK;
3769 
3770     UINT_32 eqIndex = HwlGetEquationIndex(pIn, pOut);
3771 
3772     if (eqIndex < m_numEquations)
3773     {
3774         if (IsXor(pIn->swizzleMode))
3775         {
3776             const UINT_32        blkSizeLog2       = GetBlockSizeLog2(pIn->swizzleMode);
3777             const UINT_32        numPipeBits       = GetPipeXorBits(blkSizeLog2);
3778             const UINT_32        numBankBits       = GetBankXorBits(blkSizeLog2);
3779             const UINT_32        bppLog2           = Log2(pIn->bpp >> 3);
3780             const UINT_32        maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
3781             const ADDR_EQUATION *pEqToCheck        = &m_equationTable[eqIndex];
3782 
3783             ADDR_ASSERT(maxYCoordBlock256 ==
3784                         GetMaxValidChannelIndex(&pEqToCheck->addr[0], GetBlockSizeLog2(ADDR_SW_256B), 1));
3785 
3786             const UINT_32 maxYCoordInBaseEquation =
3787                 (blkSizeLog2 - GetBlockSizeLog2(ADDR_SW_256B)) / 2 + maxYCoordBlock256;
3788 
3789             ADDR_ASSERT(maxYCoordInBaseEquation ==
3790                         GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1));
3791 
3792             const UINT_32 maxYCoordInPipeXor = (numPipeBits == 0) ? 0 : maxYCoordBlock256 + numPipeBits;
3793 
3794             ADDR_ASSERT(maxYCoordInPipeXor ==
3795                         GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2], numPipeBits, 1));
3796 
3797             const UINT_32 maxYCoordInBankXor = (numBankBits == 0) ?
3798                                                0 : maxYCoordBlock256 + (numPipeBits + 1) / 2 + numBankBits;
3799 
3800             ADDR_ASSERT(maxYCoordInBankXor ==
3801                         GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2 + numPipeBits], numBankBits, 1));
3802 
3803             const UINT_32 maxYCoordInPipeBankXor = Max(maxYCoordInPipeXor, maxYCoordInBankXor);
3804 
3805             if (maxYCoordInPipeBankXor > maxYCoordInBaseEquation)
3806             {
3807                 *pHeightAlign = 1u << maxYCoordInPipeBankXor;
3808 
3809                 if (pOut->pStereoInfo != NULL)
3810                 {
3811                     pOut->pStereoInfo->rightSwizzle = 0;
3812 
3813                     if ((PowTwoAlign(pIn->height, *pHeightAlign) % (*pHeightAlign * 2)) != 0)
3814                     {
3815                         if (maxYCoordInPipeXor == maxYCoordInPipeBankXor)
3816                         {
3817                             pOut->pStereoInfo->rightSwizzle |= (1u << 1);
3818                         }
3819 
3820                         if (maxYCoordInBankXor == maxYCoordInPipeBankXor)
3821                         {
3822                             pOut->pStereoInfo->rightSwizzle |=
3823                                 1u << ((numPipeBits % 2) ? numPipeBits : numPipeBits + 1);
3824                         }
3825 
3826                         ADDR_ASSERT(pOut->pStereoInfo->rightSwizzle ==
3827                                     GetCoordActiveMask(&pEqToCheck->xor1[m_pipeInterleaveLog2],
3828                                                        numPipeBits + numBankBits, 1, maxYCoordInPipeBankXor));
3829                     }
3830                 }
3831             }
3832         }
3833     }
3834     else
3835     {
3836         ADDR_ASSERT_ALWAYS();
3837         returnCode = ADDR_ERROR;
3838     }
3839 
3840     return returnCode;
3841 }
3842 
3843 /**
3844 ************************************************************************************************************************
3845 *   Gfx9Lib::HwlComputeSurfaceInfoTiled
3846 *
3847 *   @brief
3848 *       Internal function to calculate alignment for tiled surface
3849 *
3850 *   @return
3851 *       ADDR_E_RETURNCODE
3852 ************************************************************************************************************************
3853 */
HwlComputeSurfaceInfoTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3854 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
3855      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3856      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3857      ) const
3858 {
3859     ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3860                                                                 &pOut->blockHeight,
3861                                                                 &pOut->blockSlices,
3862                                                                 pIn->bpp,
3863                                                                 pIn->numFrags,
3864                                                                 pIn->resourceType,
3865                                                                 pIn->swizzleMode);
3866 
3867     if (returnCode == ADDR_OK)
3868     {
3869         UINT_32 pitchAlignInElement = pOut->blockWidth;
3870 
3871         if ((IsTex2d(pIn->resourceType) == TRUE) &&
3872             (pIn->flags.display || pIn->flags.rotated) &&
3873             (pIn->numMipLevels <= 1) &&
3874             (pIn->numSamples <= 1) &&
3875             (pIn->numFrags <= 1))
3876         {
3877             // Display engine needs pitch align to be at least 32 pixels.
3878             pitchAlignInElement = PowTwoAlign(pitchAlignInElement, 32);
3879         }
3880 
3881         pOut->pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
3882 
3883         if ((pIn->numMipLevels <= 1) && (pIn->pitchInElement > 0))
3884         {
3885             if ((pIn->pitchInElement % pitchAlignInElement) != 0)
3886             {
3887                 returnCode = ADDR_INVALIDPARAMS;
3888             }
3889             else if (pIn->pitchInElement < pOut->pitch)
3890             {
3891                 returnCode = ADDR_INVALIDPARAMS;
3892             }
3893             else
3894             {
3895                 pOut->pitch = pIn->pitchInElement;
3896             }
3897         }
3898 
3899         UINT_32 heightAlign = 0;
3900 
3901         if (pIn->flags.qbStereo)
3902         {
3903             returnCode = ComputeStereoInfo(pIn, pOut, &heightAlign);
3904         }
3905 
3906         if (returnCode == ADDR_OK)
3907         {
3908             pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
3909 
3910             if (heightAlign > 1)
3911             {
3912                 pOut->height = PowTwoAlign(pOut->height, heightAlign);
3913             }
3914 
3915             pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3916 
3917             pOut->epitchIsHeight   = FALSE;
3918             pOut->mipChainInTail   = FALSE;
3919             pOut->firstMipIdInTail = pIn->numMipLevels;
3920 
3921             pOut->mipChainPitch    = pOut->pitch;
3922             pOut->mipChainHeight   = pOut->height;
3923             pOut->mipChainSlice    = pOut->numSlices;
3924 
3925             if (pIn->numMipLevels > 1)
3926             {
3927                 pOut->firstMipIdInTail = GetMipChainInfo(pIn->resourceType,
3928                                                          pIn->swizzleMode,
3929                                                          pIn->bpp,
3930                                                          pIn->width,
3931                                                          pIn->height,
3932                                                          pIn->numSlices,
3933                                                          pOut->blockWidth,
3934                                                          pOut->blockHeight,
3935                                                          pOut->blockSlices,
3936                                                          pIn->numMipLevels,
3937                                                          pOut->pMipInfo);
3938 
3939                 const UINT_32 endingMipId = Min(pOut->firstMipIdInTail, pIn->numMipLevels - 1);
3940 
3941                 if (endingMipId == 0)
3942                 {
3943                     const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
3944                                                            pIn->swizzleMode,
3945                                                            pOut->blockWidth,
3946                                                            pOut->blockHeight,
3947                                                            pOut->blockSlices);
3948 
3949                     pOut->epitchIsHeight = TRUE;
3950                     pOut->pitch          = tailMaxDim.w;
3951                     pOut->height         = tailMaxDim.h;
3952                     pOut->numSlices      = IsThick(pIn->resourceType, pIn->swizzleMode) ?
3953                                            tailMaxDim.d : pIn->numSlices;
3954                     pOut->mipChainInTail = TRUE;
3955                 }
3956                 else
3957                 {
3958                     UINT_32 mip0WidthInBlk  = pOut->pitch  / pOut->blockWidth;
3959                     UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight;
3960 
3961                     AddrMajorMode majorMode = GetMajorMode(pIn->resourceType,
3962                                                            pIn->swizzleMode,
3963                                                            mip0WidthInBlk,
3964                                                            mip0HeightInBlk,
3965                                                            pOut->numSlices / pOut->blockSlices);
3966                     if (majorMode == ADDR_MAJOR_Y)
3967                     {
3968                         UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk);
3969 
3970                         if ((mip1WidthInBlk == 1) && (endingMipId > 2))
3971                         {
3972                             mip1WidthInBlk++;
3973                         }
3974 
3975                         pOut->mipChainPitch += (mip1WidthInBlk * pOut->blockWidth);
3976 
3977                         pOut->epitchIsHeight = FALSE;
3978                     }
3979                     else
3980                     {
3981                         UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk);
3982 
3983                         if ((mip1HeightInBlk == 1) && (endingMipId > 2))
3984                         {
3985                             mip1HeightInBlk++;
3986                         }
3987 
3988                         pOut->mipChainHeight += (mip1HeightInBlk * pOut->blockHeight);
3989 
3990                         pOut->epitchIsHeight = TRUE;
3991                     }
3992                 }
3993 
3994                 if (pOut->pMipInfo != NULL)
3995                 {
3996                     UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
3997 
3998                     for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
3999                     {
4000                         Dim3d   mipStartPos          = {0};
4001                         UINT_32 mipTailOffsetInBytes = 0;
4002 
4003                         mipStartPos = GetMipStartPos(pIn->resourceType,
4004                                                      pIn->swizzleMode,
4005                                                      pOut->pitch,
4006                                                      pOut->height,
4007                                                      pOut->numSlices,
4008                                                      pOut->blockWidth,
4009                                                      pOut->blockHeight,
4010                                                      pOut->blockSlices,
4011                                                      i,
4012                                                      elementBytesLog2,
4013                                                      &mipTailOffsetInBytes);
4014 
4015                         UINT_32 pitchInBlock     =
4016                             pOut->mipChainPitch / pOut->blockWidth;
4017                         UINT_32 sliceInBlock     =
4018                             (pOut->mipChainHeight / pOut->blockHeight) * pitchInBlock;
4019                         UINT_64 blockIndex       =
4020                             mipStartPos.d * sliceInBlock + mipStartPos.h * pitchInBlock + mipStartPos.w;
4021                         UINT_64 macroBlockOffset =
4022                             blockIndex << GetBlockSizeLog2(pIn->swizzleMode);
4023 
4024                         pOut->pMipInfo[i].macroBlockOffset = macroBlockOffset;
4025                         pOut->pMipInfo[i].mipTailOffset    = mipTailOffsetInBytes;
4026                     }
4027                 }
4028             }
4029             else if (pOut->pMipInfo != NULL)
4030             {
4031                 pOut->pMipInfo[0].pitch  = pOut->pitch;
4032                 pOut->pMipInfo[0].height = pOut->height;
4033                 pOut->pMipInfo[0].depth  = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
4034                 pOut->pMipInfo[0].offset = 0;
4035             }
4036 
4037             pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
4038                               (pIn->bpp >> 3) * pIn->numFrags;
4039             pOut->surfSize  = pOut->sliceSize * pOut->mipChainSlice;
4040             pOut->baseAlign = HwlComputeSurfaceBaseAlign(pIn->swizzleMode);
4041 
4042             if (pIn->flags.prt)
4043             {
4044                 pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment);
4045             }
4046         }
4047     }
4048 
4049     return returnCode;
4050 }
4051 
4052 /**
4053 ************************************************************************************************************************
4054 *   Gfx9Lib::HwlComputeSurfaceInfoLinear
4055 *
4056 *   @brief
4057 *       Internal function to calculate alignment for linear surface
4058 *
4059 *   @return
4060 *       ADDR_E_RETURNCODE
4061 ************************************************************************************************************************
4062 */
HwlComputeSurfaceInfoLinear(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const4063 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoLinear(
4064      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
4065      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
4066      ) const
4067 {
4068     ADDR_E_RETURNCODE returnCode   = ADDR_OK;
4069     UINT_32           pitch        = 0;
4070     UINT_32           actualHeight = 0;
4071     UINT_32           elementBytes = pIn->bpp >> 3;
4072     const UINT_32     alignment    = pIn->flags.prt ? PrtAlignment : 256;
4073 
4074     if (IsTex1d(pIn->resourceType))
4075     {
4076         if (pIn->height > 1)
4077         {
4078             returnCode = ADDR_INVALIDPARAMS;
4079         }
4080         else
4081         {
4082             const UINT_32 pitchAlignInElement = alignment / elementBytes;
4083 
4084             pitch        = PowTwoAlign(pIn->width, pitchAlignInElement);
4085             actualHeight = pIn->numMipLevels;
4086 
4087             if (pIn->flags.prt == FALSE)
4088             {
4089                 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4090                                                         &pitch, &actualHeight);
4091             }
4092 
4093             if (returnCode == ADDR_OK)
4094             {
4095                 if (pOut->pMipInfo != NULL)
4096                 {
4097                     for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4098                     {
4099                         pOut->pMipInfo[i].offset = pitch * elementBytes * i;
4100                         pOut->pMipInfo[i].pitch  = pitch;
4101                         pOut->pMipInfo[i].height = 1;
4102                         pOut->pMipInfo[i].depth  = 1;
4103                     }
4104                 }
4105             }
4106         }
4107     }
4108     else
4109     {
4110         returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo);
4111     }
4112 
4113     if ((pitch == 0) || (actualHeight == 0))
4114     {
4115         returnCode = ADDR_INVALIDPARAMS;
4116     }
4117 
4118     if (returnCode == ADDR_OK)
4119     {
4120         pOut->pitch          = pitch;
4121         pOut->height         = pIn->height;
4122         pOut->numSlices      = pIn->numSlices;
4123         pOut->mipChainPitch  = pitch;
4124         pOut->mipChainHeight = actualHeight;
4125         pOut->mipChainSlice  = pOut->numSlices;
4126         pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE;
4127         pOut->sliceSize      = static_cast<UINT_64>(pOut->pitch) * actualHeight * elementBytes;
4128         pOut->surfSize       = pOut->sliceSize * pOut->numSlices;
4129         pOut->baseAlign      = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment;
4130         pOut->blockWidth     = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4131         pOut->blockHeight    = 1;
4132         pOut->blockSlices    = 1;
4133     }
4134 
4135     // Post calculation validate
4136     ADDR_ASSERT(pOut->sliceSize > 0);
4137 
4138     return returnCode;
4139 }
4140 
4141 /**
4142 ************************************************************************************************************************
4143 *   Gfx9Lib::GetMipChainInfo
4144 *
4145 *   @brief
4146 *       Internal function to get out information about mip chain
4147 *
4148 *   @return
4149 *       Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
4150 ************************************************************************************************************************
4151 */
GetMipChainInfo(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 bpp,UINT_32 mip0Width,UINT_32 mip0Height,UINT_32 mip0Depth,UINT_32 blockWidth,UINT_32 blockHeight,UINT_32 blockDepth,UINT_32 numMipLevel,ADDR2_MIP_INFO * pMipInfo) const4152 UINT_32 Gfx9Lib::GetMipChainInfo(
4153     AddrResourceType  resourceType,
4154     AddrSwizzleMode   swizzleMode,
4155     UINT_32           bpp,
4156     UINT_32           mip0Width,
4157     UINT_32           mip0Height,
4158     UINT_32           mip0Depth,
4159     UINT_32           blockWidth,
4160     UINT_32           blockHeight,
4161     UINT_32           blockDepth,
4162     UINT_32           numMipLevel,
4163     ADDR2_MIP_INFO*   pMipInfo) const
4164 {
4165     const Dim3d tailMaxDim =
4166         GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4167 
4168     UINT_32 mipPitch         = mip0Width;
4169     UINT_32 mipHeight        = mip0Height;
4170     UINT_32 mipDepth         = IsTex3d(resourceType) ? mip0Depth : 1;
4171     UINT_32 offset           = 0;
4172     UINT_32 firstMipIdInTail = numMipLevel;
4173     BOOL_32 inTail           = FALSE;
4174     BOOL_32 finalDim         = FALSE;
4175     BOOL_32 is3dThick        = IsThick(resourceType, swizzleMode);
4176     BOOL_32 is3dThin         = IsTex3d(resourceType) && (is3dThick == FALSE);
4177 
4178     for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++)
4179     {
4180         if (inTail)
4181         {
4182             if (finalDim == FALSE)
4183             {
4184                 UINT_32 mipSize;
4185 
4186                 if (is3dThick)
4187                 {
4188                     mipSize = mipPitch * mipHeight * mipDepth * (bpp >> 3);
4189                 }
4190                 else
4191                 {
4192                     mipSize = mipPitch * mipHeight * (bpp >> 3);
4193                 }
4194 
4195                 if (mipSize <= 256)
4196                 {
4197                     UINT_32 index = Log2(bpp >> 3);
4198 
4199                     if (is3dThick)
4200                     {
4201                         mipPitch  = Block256_3dZ[index].w;
4202                         mipHeight = Block256_3dZ[index].h;
4203                         mipDepth  = Block256_3dZ[index].d;
4204                     }
4205                     else
4206                     {
4207                         mipPitch  = Block256_2d[index].w;
4208                         mipHeight = Block256_2d[index].h;
4209                     }
4210 
4211                     finalDim = TRUE;
4212                 }
4213             }
4214         }
4215         else
4216         {
4217             inTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim,
4218                                  mipPitch, mipHeight, mipDepth);
4219 
4220             if (inTail)
4221             {
4222                 firstMipIdInTail = mipId;
4223                 mipPitch         = tailMaxDim.w;
4224                 mipHeight        = tailMaxDim.h;
4225 
4226                 if (is3dThick)
4227                 {
4228                     mipDepth = tailMaxDim.d;
4229                 }
4230             }
4231             else
4232             {
4233                 mipPitch  = PowTwoAlign(mipPitch,  blockWidth);
4234                 mipHeight = PowTwoAlign(mipHeight, blockHeight);
4235 
4236                 if (is3dThick)
4237                 {
4238                     mipDepth = PowTwoAlign(mipDepth,  blockDepth);
4239                 }
4240             }
4241         }
4242 
4243         if (pMipInfo != NULL)
4244         {
4245             pMipInfo[mipId].pitch  = mipPitch;
4246             pMipInfo[mipId].height = mipHeight;
4247             pMipInfo[mipId].depth  = mipDepth;
4248             pMipInfo[mipId].offset = offset;
4249         }
4250 
4251         offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3));
4252 
4253         if (finalDim)
4254         {
4255             if (is3dThin)
4256             {
4257                 mipDepth = Max(mipDepth >> 1, 1u);
4258             }
4259         }
4260         else
4261         {
4262             mipPitch  = Max(mipPitch >> 1, 1u);
4263             mipHeight = Max(mipHeight >> 1, 1u);
4264 
4265             if (is3dThick || is3dThin)
4266             {
4267                 mipDepth = Max(mipDepth >> 1, 1u);
4268             }
4269         }
4270     }
4271 
4272     return firstMipIdInTail;
4273 }
4274 
4275 /**
4276 ************************************************************************************************************************
4277 *   Gfx9Lib::GetMetaMiptailInfo
4278 *
4279 *   @brief
4280 *       Get mip tail coordinate information.
4281 *
4282 *   @return
4283 *       N/A
4284 ************************************************************************************************************************
4285 */
GetMetaMiptailInfo(ADDR2_META_MIP_INFO * pInfo,Dim3d mipCoord,UINT_32 numMipInTail,Dim3d * pMetaBlkDim) const4286 VOID Gfx9Lib::GetMetaMiptailInfo(
4287     ADDR2_META_MIP_INFO*    pInfo,          ///< [out] output structure to store per mip coord
4288     Dim3d                   mipCoord,       ///< [in] mip tail base coord
4289     UINT_32                 numMipInTail,   ///< [in] number of mips in tail
4290     Dim3d*                  pMetaBlkDim     ///< [in] meta block width/height/depth
4291     ) const
4292 {
4293     BOOL_32 isThick   = (pMetaBlkDim->d > 1);
4294     UINT_32 mipWidth  = pMetaBlkDim->w;
4295     UINT_32 mipHeight = pMetaBlkDim->h >> 1;
4296     UINT_32 mipDepth  = pMetaBlkDim->d;
4297     UINT_32 minInc;
4298 
4299     if (isThick)
4300     {
4301         minInc = (pMetaBlkDim->h >= 512) ? 128 : ((pMetaBlkDim->h == 256) ? 64 : 32);
4302     }
4303     else if (pMetaBlkDim->h >= 1024)
4304     {
4305         minInc = 256;
4306     }
4307     else if (pMetaBlkDim->h == 512)
4308     {
4309         minInc = 128;
4310     }
4311     else
4312     {
4313         minInc = 64;
4314     }
4315 
4316     UINT_32 blk32MipId = 0xFFFFFFFF;
4317 
4318     for (UINT_32 mip = 0; mip < numMipInTail; mip++)
4319     {
4320         pInfo[mip].inMiptail = TRUE;
4321         pInfo[mip].startX = mipCoord.w;
4322         pInfo[mip].startY = mipCoord.h;
4323         pInfo[mip].startZ = mipCoord.d;
4324         pInfo[mip].width = mipWidth;
4325         pInfo[mip].height = mipHeight;
4326         pInfo[mip].depth = mipDepth;
4327 
4328         if (mipWidth <= 32)
4329         {
4330             if (blk32MipId == 0xFFFFFFFF)
4331             {
4332                 blk32MipId = mip;
4333             }
4334 
4335             mipCoord.w = pInfo[blk32MipId].startX;
4336             mipCoord.h = pInfo[blk32MipId].startY;
4337             mipCoord.d = pInfo[blk32MipId].startZ;
4338 
4339             switch (mip - blk32MipId)
4340             {
4341                 case 0:
4342                     mipCoord.w += 32;       // 16x16
4343                     break;
4344                 case 1:
4345                     mipCoord.h += 32;       // 8x8
4346                     break;
4347                 case 2:
4348                     mipCoord.h += 32;       // 4x4
4349                     mipCoord.w += 16;
4350                     break;
4351                 case 3:
4352                     mipCoord.h += 32;       // 2x2
4353                     mipCoord.w += 32;
4354                     break;
4355                 case 4:
4356                     mipCoord.h += 32;       // 1x1
4357                     mipCoord.w += 48;
4358                     break;
4359                 // The following are for BC/ASTC formats
4360                 case 5:
4361                     mipCoord.h += 48;       // 1/2 x 1/2
4362                     break;
4363                 case 6:
4364                     mipCoord.h += 48;       // 1/4 x 1/4
4365                     mipCoord.w += 16;
4366                     break;
4367                 case 7:
4368                     mipCoord.h += 48;       // 1/8 x 1/8
4369                     mipCoord.w += 32;
4370                     break;
4371                 case 8:
4372                     mipCoord.h += 48;       // 1/16 x 1/16
4373                     mipCoord.w += 48;
4374                     break;
4375                 default:
4376                     ADDR_ASSERT_ALWAYS();
4377                     break;
4378             }
4379 
4380             mipWidth = ((mip - blk32MipId) == 0) ? 16 : 8;
4381             mipHeight = mipWidth;
4382 
4383             if (isThick)
4384             {
4385                 mipDepth = mipWidth;
4386             }
4387         }
4388         else
4389         {
4390             if (mipWidth <= minInc)
4391             {
4392                 // if we're below the minimal increment...
4393                 if (isThick)
4394                 {
4395                     // For 3d, just go in z direction
4396                     mipCoord.d += mipDepth;
4397                 }
4398                 else
4399                 {
4400                     // For 2d, first go across, then down
4401                     if ((mipWidth * 2) == minInc)
4402                     {
4403                         // if we're 2 mips below, that's when we go back in x, and down in y
4404                         mipCoord.w -= minInc;
4405                         mipCoord.h += minInc;
4406                     }
4407                     else
4408                     {
4409                         // otherwise, just go across in x
4410                         mipCoord.w += minInc;
4411                     }
4412                 }
4413             }
4414             else
4415             {
4416                 // On even mip, go down, otherwise, go across
4417                 if (mip & 1)
4418                 {
4419                     mipCoord.w += mipWidth;
4420                 }
4421                 else
4422                 {
4423                     mipCoord.h += mipHeight;
4424                 }
4425             }
4426             // Divide the width by 2
4427             mipWidth >>= 1;
4428             // After the first mip in tail, the mip is always a square
4429             mipHeight = mipWidth;
4430             // ...or for 3d, a cube
4431             if (isThick)
4432             {
4433                 mipDepth = mipWidth;
4434             }
4435         }
4436     }
4437 }
4438 
4439 /**
4440 ************************************************************************************************************************
4441 *   Gfx9Lib::GetMipStartPos
4442 *
4443 *   @brief
4444 *       Internal function to get out information about mip logical start position
4445 *
4446 *   @return
4447 *       logical start position in macro block width/heith/depth of one mip level within one slice
4448 ************************************************************************************************************************
4449 */
GetMipStartPos(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 width,UINT_32 height,UINT_32 depth,UINT_32 blockWidth,UINT_32 blockHeight,UINT_32 blockDepth,UINT_32 mipId,UINT_32 log2ElementBytes,UINT_32 * pMipTailBytesOffset) const4450 Dim3d Gfx9Lib::GetMipStartPos(
4451     AddrResourceType  resourceType,
4452     AddrSwizzleMode   swizzleMode,
4453     UINT_32           width,
4454     UINT_32           height,
4455     UINT_32           depth,
4456     UINT_32           blockWidth,
4457     UINT_32           blockHeight,
4458     UINT_32           blockDepth,
4459     UINT_32           mipId,
4460     UINT_32           log2ElementBytes,
4461     UINT_32*          pMipTailBytesOffset) const
4462 {
4463     Dim3d       mipStartPos = {0};
4464     const Dim3d tailMaxDim  = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4465 
4466     // Report mip in tail if Mip0 is already in mip tail
4467     BOOL_32 inMipTail      = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth);
4468     UINT_32 log2blkSize    = GetBlockSizeLog2(swizzleMode);
4469     UINT_32 mipIndexInTail = mipId;
4470 
4471     if (inMipTail == FALSE)
4472     {
4473         // Mip 0 dimension, unit in block
4474         UINT_32 mipWidthInBlk   = width  / blockWidth;
4475         UINT_32 mipHeightInBlk  = height / blockHeight;
4476         UINT_32 mipDepthInBlk   = depth  / blockDepth;
4477         AddrMajorMode majorMode = GetMajorMode(resourceType,
4478                                                swizzleMode,
4479                                                mipWidthInBlk,
4480                                                mipHeightInBlk,
4481                                                mipDepthInBlk);
4482 
4483         UINT_32 endingMip = mipId + 1;
4484 
4485         for (UINT_32 i = 1; i <= mipId; i++)
4486         {
4487             if ((i == 1) || (i == 3))
4488             {
4489                 if (majorMode == ADDR_MAJOR_Y)
4490                 {
4491                     mipStartPos.w += mipWidthInBlk;
4492                 }
4493                 else
4494                 {
4495                     mipStartPos.h += mipHeightInBlk;
4496                 }
4497             }
4498             else
4499             {
4500                 if (majorMode == ADDR_MAJOR_X)
4501                 {
4502                    mipStartPos.w += mipWidthInBlk;
4503                 }
4504                 else if (majorMode == ADDR_MAJOR_Y)
4505                 {
4506                    mipStartPos.h += mipHeightInBlk;
4507                 }
4508                 else
4509                 {
4510                    mipStartPos.d += mipDepthInBlk;
4511                 }
4512             }
4513 
4514             BOOL_32 inTail = FALSE;
4515 
4516             if (IsThick(resourceType, swizzleMode))
4517             {
4518                 UINT_32 dim = log2blkSize % 3;
4519 
4520                 if (dim == 0)
4521                 {
4522                     inTail =
4523                         (mipWidthInBlk <= 2) && (mipHeightInBlk == 1) && (mipDepthInBlk <= 2);
4524                 }
4525                 else if (dim == 1)
4526                 {
4527                     inTail =
4528                         (mipWidthInBlk == 1) && (mipHeightInBlk <= 2) && (mipDepthInBlk <= 2);
4529                 }
4530                 else
4531                 {
4532                     inTail =
4533                         (mipWidthInBlk <= 2) && (mipHeightInBlk <= 2) && (mipDepthInBlk == 1);
4534                 }
4535             }
4536             else
4537             {
4538                 if (log2blkSize & 1)
4539                 {
4540                     inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1);
4541                 }
4542                 else
4543                 {
4544                     inTail = (mipWidthInBlk == 1) && (mipHeightInBlk <= 2);
4545                 }
4546             }
4547 
4548             if (inTail)
4549             {
4550                 endingMip = i;
4551                 break;
4552             }
4553 
4554             mipWidthInBlk  = RoundHalf(mipWidthInBlk);
4555             mipHeightInBlk = RoundHalf(mipHeightInBlk);
4556             mipDepthInBlk  = RoundHalf(mipDepthInBlk);
4557         }
4558 
4559         if (mipId >= endingMip)
4560         {
4561             inMipTail      = TRUE;
4562             mipIndexInTail = mipId - endingMip;
4563         }
4564     }
4565 
4566     if (inMipTail)
4567     {
4568         UINT_32 index = mipIndexInTail + MaxMacroBits - log2blkSize;
4569         ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32));
4570         *pMipTailBytesOffset = MipTailOffset256B[index] << 8;
4571     }
4572 
4573     return mipStartPos;
4574 }
4575 
4576 /**
4577 ************************************************************************************************************************
4578 *   Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
4579 *
4580 *   @brief
4581 *       Internal function to calculate address from coord for tiled swizzle surface
4582 *
4583 *   @return
4584 *       ADDR_E_RETURNCODE
4585 ************************************************************************************************************************
4586 */
HwlComputeSurfaceAddrFromCoordTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4587 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
4588      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
4589      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
4590      ) const
4591 {
4592     ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
4593     localIn.swizzleMode  = pIn->swizzleMode;
4594     localIn.flags        = pIn->flags;
4595     localIn.resourceType = pIn->resourceType;
4596     localIn.bpp          = pIn->bpp;
4597     localIn.width        = Max(pIn->unalignedWidth, 1u);
4598     localIn.height       = Max(pIn->unalignedHeight, 1u);
4599     localIn.numSlices    = Max(pIn->numSlices, 1u);
4600     localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4601     localIn.numSamples   = Max(pIn->numSamples, 1u);
4602     localIn.numFrags     = Max(pIn->numFrags, 1u);
4603     if (localIn.numMipLevels <= 1)
4604     {
4605         localIn.pitchInElement = pIn->pitchInElement;
4606     }
4607 
4608     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
4609     ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfoTiled(&localIn, &localOut);
4610 
4611     BOOL_32 valid = (returnCode == ADDR_OK) &&
4612                     (IsThin(pIn->resourceType, pIn->swizzleMode) ||
4613                      IsThick(pIn->resourceType, pIn->swizzleMode)) &&
4614                     ((pIn->pipeBankXor == 0) || (IsXor(pIn->swizzleMode)));
4615 
4616     if (valid)
4617     {
4618         UINT_32 log2ElementBytes   = Log2(pIn->bpp >> 3);
4619         Dim3d   mipStartPos        = {0};
4620         UINT_32 mipTailBytesOffset = 0;
4621 
4622         if (pIn->numMipLevels > 1)
4623         {
4624             // Mip-map chain cannot be MSAA surface
4625             ADDR_ASSERT((pIn->numSamples <= 1) && (pIn->numFrags<= 1));
4626 
4627             mipStartPos = GetMipStartPos(pIn->resourceType,
4628                                          pIn->swizzleMode,
4629                                          localOut.pitch,
4630                                          localOut.height,
4631                                          localOut.numSlices,
4632                                          localOut.blockWidth,
4633                                          localOut.blockHeight,
4634                                          localOut.blockSlices,
4635                                          pIn->mipId,
4636                                          log2ElementBytes,
4637                                          &mipTailBytesOffset);
4638         }
4639 
4640         UINT_32 interleaveOffset = 0;
4641         UINT_32 pipeBits = 0;
4642         UINT_32 pipeXor = 0;
4643         UINT_32 bankBits = 0;
4644         UINT_32 bankXor = 0;
4645 
4646         if (IsThin(pIn->resourceType, pIn->swizzleMode))
4647         {
4648             UINT_32 blockOffset = 0;
4649             UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode);
4650 
4651             if (IsZOrderSwizzle(pIn->swizzleMode))
4652             {
4653                 // Morton generation
4654                 if ((log2ElementBytes == 0) || (log2ElementBytes == 2))
4655                 {
4656                     UINT_32 totalLowBits = 6 - log2ElementBytes;
4657                     UINT_32 mortBits = totalLowBits / 2;
4658                     UINT_32 lowBitsValue = MortonGen2d(pIn->y, pIn->x, mortBits);
4659                     // Are 9 bits enough?
4660                     UINT_32 highBitsValue =
4661                         MortonGen2d(pIn->x >> mortBits, pIn->y >> mortBits, 9) << totalLowBits;
4662                     blockOffset = lowBitsValue | highBitsValue;
4663                     ADDR_ASSERT(blockOffset == lowBitsValue + highBitsValue);
4664                 }
4665                 else
4666                 {
4667                     blockOffset = MortonGen2d(pIn->y, pIn->x, 13);
4668                 }
4669 
4670                 // Fill LSBs with sample bits
4671                 if (pIn->numSamples > 1)
4672                 {
4673                     blockOffset *= pIn->numSamples;
4674                     blockOffset |= pIn->sample;
4675                 }
4676 
4677                 // Shift according to BytesPP
4678                 blockOffset <<= log2ElementBytes;
4679             }
4680             else
4681             {
4682                 // Micro block offset
4683                 UINT_32 microBlockOffset = ComputeSurface2DMicroBlockOffset(pIn);
4684                 blockOffset = microBlockOffset;
4685 
4686                 // Micro block dimension
4687                 ADDR_ASSERT(log2ElementBytes < MaxNumOfBpp);
4688                 Dim2d microBlockDim = Block256_2d[log2ElementBytes];
4689                 // Morton generation, does 12 bit enough?
4690                 blockOffset |=
4691                     MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8;
4692 
4693                 // Sample bits start location
4694                 UINT_32 sampleStart = log2blkSize - Log2(pIn->numSamples);
4695                 // Join sample bits information to the highest Macro block bits
4696                 if (IsNonPrtXor(pIn->swizzleMode))
4697                 {
4698                     // Non-prt-Xor : xor highest Macro block bits with sample bits
4699                     blockOffset = blockOffset ^ (pIn->sample << sampleStart);
4700                 }
4701                 else
4702                 {
4703                     // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
4704                     // after this op, the blockOffset only contains log2 Macro block size bits
4705                     blockOffset %= (1 << sampleStart);
4706                     blockOffset |= (pIn->sample << sampleStart);
4707                     ADDR_ASSERT((blockOffset >> log2blkSize) == 0);
4708                 }
4709             }
4710 
4711             if (IsXor(pIn->swizzleMode))
4712             {
4713                 // Mask off bits above Macro block bits to keep page synonyms working for prt
4714                 if (IsPrt(pIn->swizzleMode))
4715                 {
4716                     blockOffset &= ((1 << log2blkSize) - 1);
4717                 }
4718 
4719                 // Preserve offset inside pipe interleave
4720                 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4721                 blockOffset >>= m_pipeInterleaveLog2;
4722 
4723                 // Pipe/Se xor bits
4724                 pipeBits = GetPipeXorBits(log2blkSize);
4725                 // Pipe xor
4726                 pipeXor = FoldXor2d(blockOffset, pipeBits);
4727                 blockOffset >>= pipeBits;
4728 
4729                 // Bank xor bits
4730                 bankBits = GetBankXorBits(log2blkSize);
4731                 // Bank Xor
4732                 bankXor = FoldXor2d(blockOffset, bankBits);
4733                 blockOffset >>= bankBits;
4734 
4735                 // Put all the part back together
4736                 blockOffset <<= bankBits;
4737                 blockOffset |= bankXor;
4738                 blockOffset <<= pipeBits;
4739                 blockOffset |= pipeXor;
4740                 blockOffset <<= m_pipeInterleaveLog2;
4741                 blockOffset |= interleaveOffset;
4742             }
4743 
4744             ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4745             ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize)));
4746 
4747             blockOffset |= mipTailBytesOffset;
4748 
4749             if (IsNonPrtXor(pIn->swizzleMode) && (pIn->numSamples <= 1))
4750             {
4751                 // Apply slice xor if not MSAA/PRT
4752                 blockOffset ^= (ReverseBitVector(pIn->slice, pipeBits) << m_pipeInterleaveLog2);
4753                 blockOffset ^= (ReverseBitVector(pIn->slice >> pipeBits, bankBits) <<
4754                                 (m_pipeInterleaveLog2 + pipeBits));
4755             }
4756 
4757             returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4758                                                   bankBits, pipeBits, &blockOffset);
4759 
4760             blockOffset %= (1 << log2blkSize);
4761 
4762             UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
4763             UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
4764             UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock;
4765             UINT_32 macroBlockIndex =
4766                 (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock +
4767                 ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
4768                 ((pIn->x / localOut.blockWidth) + mipStartPos.w);
4769 
4770             UINT_64 macroBlockOffset = (static_cast<UINT_64>(macroBlockIndex) <<
4771                                        GetBlockSizeLog2(pIn->swizzleMode));
4772 
4773             pOut->addr = blockOffset | macroBlockOffset;
4774         }
4775         else
4776         {
4777             UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode);
4778 
4779             Dim3d microBlockDim = Block1K_3d[log2ElementBytes];
4780 
4781             UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w),
4782                                               (pIn->y / microBlockDim.h),
4783                                               (pIn->slice / microBlockDim.d),
4784                                               8);
4785 
4786             blockOffset <<= 10;
4787             blockOffset |= ComputeSurface3DMicroBlockOffset(pIn);
4788 
4789             if (IsXor(pIn->swizzleMode))
4790             {
4791                 // Mask off bits above Macro block bits to keep page synonyms working for prt
4792                 if (IsPrt(pIn->swizzleMode))
4793                 {
4794                     blockOffset &= ((1 << log2blkSize) - 1);
4795                 }
4796 
4797                 // Preserve offset inside pipe interleave
4798                 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4799                 blockOffset >>= m_pipeInterleaveLog2;
4800 
4801                 // Pipe/Se xor bits
4802                 pipeBits = GetPipeXorBits(log2blkSize);
4803                 // Pipe xor
4804                 pipeXor = FoldXor3d(blockOffset, pipeBits);
4805                 blockOffset >>= pipeBits;
4806 
4807                 // Bank xor bits
4808                 bankBits = GetBankXorBits(log2blkSize);
4809                 // Bank Xor
4810                 bankXor = FoldXor3d(blockOffset, bankBits);
4811                 blockOffset >>= bankBits;
4812 
4813                 // Put all the part back together
4814                 blockOffset <<= bankBits;
4815                 blockOffset |= bankXor;
4816                 blockOffset <<= pipeBits;
4817                 blockOffset |= pipeXor;
4818                 blockOffset <<= m_pipeInterleaveLog2;
4819                 blockOffset |= interleaveOffset;
4820             }
4821 
4822             ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4823             ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize)));
4824             blockOffset |= mipTailBytesOffset;
4825 
4826             returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
4827                                                   bankBits, pipeBits, &blockOffset);
4828 
4829             blockOffset %= (1 << log2blkSize);
4830 
4831             UINT_32 xb = pIn->x / localOut.blockWidth  + mipStartPos.w;
4832             UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h;
4833             UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d;
4834 
4835             UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth;
4836             UINT_32 sliceSizeInBlock =
4837                 (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
4838             UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
4839 
4840             pOut->addr = blockOffset | (blockIndex << log2blkSize);
4841         }
4842     }
4843     else
4844     {
4845         returnCode = ADDR_INVALIDPARAMS;
4846     }
4847 
4848     return returnCode;
4849 }
4850 
4851 /**
4852 ************************************************************************************************************************
4853 *   Gfx9Lib::ComputeSurfaceInfoLinear
4854 *
4855 *   @brief
4856 *       Internal function to calculate padding for linear swizzle 2D/3D surface
4857 *
4858 *   @return
4859 *       N/A
4860 ************************************************************************************************************************
4861 */
ComputeSurfaceLinearPadding(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,UINT_32 * pMipmap0PaddedWidth,UINT_32 * pSlice0PaddedHeight,ADDR2_MIP_INFO * pMipInfo) const4862 ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding(
4863     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,                    ///< [in] input srtucture
4864     UINT_32*                                pMipmap0PaddedWidth,    ///< [out] padded width in element
4865     UINT_32*                                pSlice0PaddedHeight,    ///< [out] padded height for HW
4866     ADDR2_MIP_INFO*                         pMipInfo                ///< [out] per mip information
4867     ) const
4868 {
4869     ADDR_E_RETURNCODE returnCode = ADDR_OK;
4870 
4871     UINT_32 elementBytes        = pIn->bpp >> 3;
4872     UINT_32 pitchAlignInElement = 0;
4873 
4874     if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL)
4875     {
4876         ADDR_ASSERT(pIn->numMipLevels <= 1);
4877         ADDR_ASSERT(pIn->numSlices <= 1);
4878         pitchAlignInElement = 1;
4879     }
4880     else
4881     {
4882         pitchAlignInElement = (256 / elementBytes);
4883     }
4884 
4885     UINT_32 mipChainWidth      = PowTwoAlign(pIn->width, pitchAlignInElement);
4886     UINT_32 slice0PaddedHeight = pIn->height;
4887 
4888     returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4889                                             &mipChainWidth, &slice0PaddedHeight);
4890 
4891     if (returnCode == ADDR_OK)
4892     {
4893         UINT_32 mipChainHeight = 0;
4894         UINT_32 mipHeight      = pIn->height;
4895 
4896         for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4897         {
4898             if (pMipInfo != NULL)
4899             {
4900                 pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes;
4901                 pMipInfo[i].pitch  = mipChainWidth;
4902                 pMipInfo[i].height = mipHeight;
4903                 pMipInfo[i].depth  = 1;
4904             }
4905 
4906             mipChainHeight += mipHeight;
4907             mipHeight = RoundHalf(mipHeight);
4908             mipHeight = Max(mipHeight, 1u);
4909         }
4910 
4911         *pMipmap0PaddedWidth = mipChainWidth;
4912         *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight;
4913     }
4914 
4915     return returnCode;
4916 }
4917 
4918 } // V2
4919 } // Addr
4920