1 /*
2  * Copyright 2015 Intel Corporation
3  *
4  *  Permission is hereby granted, free of charge, to any person obtaining a
5  *  copy of this software and associated documentation files (the "Software"),
6  *  to deal in the Software without restriction, including without limitation
7  *  the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  *  and/or sell copies of the Software, and to permit persons to whom the
9  *  Software is furnished to do so, subject to the following conditions:
10  *
11  *  The above copyright notice and this permission notice (including the next
12  *  paragraph) shall be included in all copies or substantial portions of the
13  *  Software.
14  *
15  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  *  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  *  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  *  IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdarg.h>
26 #include <stdio.h>
27 
28 #include "genxml/genX_bits.h"
29 
30 #include "isl.h"
31 #include "isl_gen4.h"
32 #include "isl_gen6.h"
33 #include "isl_gen7.h"
34 #include "isl_gen8.h"
35 #include "isl_gen9.h"
36 #include "isl_gen12.h"
37 #include "isl_priv.h"
38 
39 void
isl_memcpy_linear_to_tiled(uint32_t xt1,uint32_t xt2,uint32_t yt1,uint32_t yt2,char * dst,const char * src,uint32_t dst_pitch,int32_t src_pitch,bool has_swizzling,enum isl_tiling tiling,isl_memcpy_type copy_type)40 isl_memcpy_linear_to_tiled(uint32_t xt1, uint32_t xt2,
41                            uint32_t yt1, uint32_t yt2,
42                            char *dst, const char *src,
43                            uint32_t dst_pitch, int32_t src_pitch,
44                            bool has_swizzling,
45                            enum isl_tiling tiling,
46                            isl_memcpy_type copy_type)
47 {
48 #ifdef USE_SSE41
49    if (copy_type == ISL_MEMCPY_STREAMING_LOAD) {
50       _isl_memcpy_linear_to_tiled_sse41(
51          xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
52          tiling, copy_type);
53       return;
54    }
55 #endif
56 
57    _isl_memcpy_linear_to_tiled(
58       xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
59       tiling, copy_type);
60 }
61 
62 void
isl_memcpy_tiled_to_linear(uint32_t xt1,uint32_t xt2,uint32_t yt1,uint32_t yt2,char * dst,const char * src,int32_t dst_pitch,uint32_t src_pitch,bool has_swizzling,enum isl_tiling tiling,isl_memcpy_type copy_type)63 isl_memcpy_tiled_to_linear(uint32_t xt1, uint32_t xt2,
64                            uint32_t yt1, uint32_t yt2,
65                            char *dst, const char *src,
66                            int32_t dst_pitch, uint32_t src_pitch,
67                            bool has_swizzling,
68                            enum isl_tiling tiling,
69                            isl_memcpy_type copy_type)
70 {
71 #ifdef USE_SSE41
72    if (copy_type == ISL_MEMCPY_STREAMING_LOAD) {
73       _isl_memcpy_tiled_to_linear_sse41(
74          xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
75          tiling, copy_type);
76       return;
77    }
78 #endif
79 
80    _isl_memcpy_tiled_to_linear(
81       xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
82       tiling, copy_type);
83 }
84 
85 void PRINTFLIKE(3, 4) UNUSED
__isl_finishme(const char * file,int line,const char * fmt,...)86 __isl_finishme(const char *file, int line, const char *fmt, ...)
87 {
88    va_list ap;
89    char buf[512];
90 
91    va_start(ap, fmt);
92    vsnprintf(buf, sizeof(buf), fmt, ap);
93    va_end(ap);
94 
95    fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buf);
96 }
97 
98 static void
isl_device_setup_mocs(struct isl_device * dev)99 isl_device_setup_mocs(struct isl_device *dev)
100 {
101    if (dev->info->gen >= 12) {
102       if (dev->info->is_dg1) {
103          /* L3CC=WB */
104          dev->mocs.internal = 5 << 1;
105          /* Displayables on DG1 are free to cache in L3 since L3 is transient
106           * and flushed at bottom of each submission.
107           */
108          dev->mocs.external = 5 << 1;
109       } else {
110          /* TODO: Set PTE to MOCS 61 when the kernel is ready */
111          /* TC=1/LLC Only, LeCC=1/Uncacheable, LRUM=0, L3CC=1/Uncacheable */
112          dev->mocs.external = 3 << 1;
113          /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */
114          dev->mocs.internal = 2 << 1;
115 
116          /* L1 - HDC:L1 + L3 + LLC */
117          dev->mocs.l1_hdc_l3_llc = 48 << 1;
118       }
119    } else if (dev->info->gen >= 9) {
120       /* TC=LLC/eLLC, LeCC=PTE, LRUM=3, L3CC=WB */
121       dev->mocs.external = 1 << 1;
122       /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */
123       dev->mocs.internal = 2 << 1;
124    } else if (dev->info->gen >= 8) {
125       /* MEMORY_OBJECT_CONTROL_STATE:
126        * .MemoryTypeLLCeLLCCacheabilityControl = UCwithFenceifcoherentcycle,
127        * .TargetCache = L3DefertoPATforLLCeLLCselection,
128        * .AgeforQUADLRU = 0
129        */
130       dev->mocs.external = 0x18;
131       /* MEMORY_OBJECT_CONTROL_STATE:
132        * .MemoryTypeLLCeLLCCacheabilityControl = WB,
133        * .TargetCache = L3DefertoPATforLLCeLLCselection,
134        * .AgeforQUADLRU = 0
135        */
136       dev->mocs.internal = 0x78;
137    } else if (dev->info->gen >= 7) {
138       if (dev->info->is_haswell) {
139          /* MEMORY_OBJECT_CONTROL_STATE:
140           * .LLCeLLCCacheabilityControlLLCCC             = 0,
141           * .L3CacheabilityControlL3CC                   = 1,
142           */
143          dev->mocs.internal = 1;
144          dev->mocs.external = 1;
145       } else {
146          /* MEMORY_OBJECT_CONTROL_STATE:
147           * .GraphicsDataTypeGFDT                        = 0,
148           * .LLCCacheabilityControlLLCCC                 = 0,
149           * .L3CacheabilityControlL3CC                   = 1,
150           */
151          dev->mocs.internal = 1;
152          dev->mocs.external = 1;
153       }
154    } else {
155       dev->mocs.internal = 0;
156       dev->mocs.external = 0;
157    }
158 }
159 
160 /**
161  * Return an appropriate MOCS entry for the given usage flags.
162  */
163 uint32_t
isl_mocs(const struct isl_device * dev,isl_surf_usage_flags_t usage)164 isl_mocs(const struct isl_device *dev, isl_surf_usage_flags_t usage)
165 {
166    if (dev->info->gen >= 12 && !dev->info->is_dg1) {
167       if (usage & ISL_SURF_USAGE_STAGING_BIT)
168          return dev->mocs.internal;
169 
170       /* Using L1:HDC for storage buffers breaks Vulkan memory model
171        * tests that use shader atomics.  This isn't likely to work out,
172        * and we can't know a priori whether they'll be used.  So just
173        * continue with ordinary internal MOCS for now.
174        */
175       if (usage & ISL_SURF_USAGE_STORAGE_BIT)
176          return dev->mocs.internal;
177 
178       if (usage & (ISL_SURF_USAGE_CONSTANT_BUFFER_BIT |
179                    ISL_SURF_USAGE_RENDER_TARGET_BIT |
180                    ISL_SURF_USAGE_TEXTURE_BIT))
181          return dev->mocs.l1_hdc_l3_llc;
182    }
183 
184    return dev->mocs.internal;
185 }
186 
187 void
isl_device_init(struct isl_device * dev,const struct gen_device_info * info,bool has_bit6_swizzling)188 isl_device_init(struct isl_device *dev,
189                 const struct gen_device_info *info,
190                 bool has_bit6_swizzling)
191 {
192    /* Gen8+ don't have bit6 swizzling, ensure callsite is not confused. */
193    assert(!(has_bit6_swizzling && info->gen >= 8));
194 
195    dev->info = info;
196    dev->use_separate_stencil = ISL_DEV_GEN(dev) >= 6;
197    dev->has_bit6_swizzling = has_bit6_swizzling;
198 
199    /* The ISL_DEV macros may be defined in the CFLAGS, thus hardcoding some
200     * device properties at buildtime. Verify that the macros with the device
201     * properties chosen during runtime.
202     */
203    ISL_DEV_GEN_SANITIZE(dev);
204    ISL_DEV_USE_SEPARATE_STENCIL_SANITIZE(dev);
205 
206    /* Did we break hiz or stencil? */
207    if (ISL_DEV_USE_SEPARATE_STENCIL(dev))
208       assert(info->has_hiz_and_separate_stencil);
209    if (info->must_use_separate_stencil)
210       assert(ISL_DEV_USE_SEPARATE_STENCIL(dev));
211 
212    dev->ss.size = RENDER_SURFACE_STATE_length(info) * 4;
213    dev->ss.align = isl_align(dev->ss.size, 32);
214 
215    dev->ss.clear_color_state_size =
216       isl_align(CLEAR_COLOR_length(info) * 4, 64);
217    dev->ss.clear_color_state_offset =
218       RENDER_SURFACE_STATE_ClearValueAddress_start(info) / 32 * 4;
219 
220    dev->ss.clear_value_size =
221       isl_align(RENDER_SURFACE_STATE_RedClearColor_bits(info) +
222                 RENDER_SURFACE_STATE_GreenClearColor_bits(info) +
223                 RENDER_SURFACE_STATE_BlueClearColor_bits(info) +
224                 RENDER_SURFACE_STATE_AlphaClearColor_bits(info), 32) / 8;
225 
226    dev->ss.clear_value_offset =
227       RENDER_SURFACE_STATE_RedClearColor_start(info) / 32 * 4;
228 
229    assert(RENDER_SURFACE_STATE_SurfaceBaseAddress_start(info) % 8 == 0);
230    dev->ss.addr_offset =
231       RENDER_SURFACE_STATE_SurfaceBaseAddress_start(info) / 8;
232 
233    /* The "Auxiliary Surface Base Address" field starts a bit higher up
234     * because the bottom 12 bits are used for other things.  Round down to
235     * the nearest dword before.
236     */
237    dev->ss.aux_addr_offset =
238       (RENDER_SURFACE_STATE_AuxiliarySurfaceBaseAddress_start(info) & ~31) / 8;
239 
240    dev->ds.size = _3DSTATE_DEPTH_BUFFER_length(info) * 4;
241    assert(_3DSTATE_DEPTH_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0);
242    dev->ds.depth_offset =
243       _3DSTATE_DEPTH_BUFFER_SurfaceBaseAddress_start(info) / 8;
244 
245    if (dev->use_separate_stencil) {
246       dev->ds.size += _3DSTATE_STENCIL_BUFFER_length(info) * 4 +
247                       _3DSTATE_HIER_DEPTH_BUFFER_length(info) * 4 +
248                       _3DSTATE_CLEAR_PARAMS_length(info) * 4;
249 
250       assert(_3DSTATE_STENCIL_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0);
251       dev->ds.stencil_offset =
252          _3DSTATE_DEPTH_BUFFER_length(info) * 4 +
253          _3DSTATE_STENCIL_BUFFER_SurfaceBaseAddress_start(info) / 8;
254 
255       assert(_3DSTATE_HIER_DEPTH_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0);
256       dev->ds.hiz_offset =
257          _3DSTATE_DEPTH_BUFFER_length(info) * 4 +
258          _3DSTATE_STENCIL_BUFFER_length(info) * 4 +
259          _3DSTATE_HIER_DEPTH_BUFFER_SurfaceBaseAddress_start(info) / 8;
260    } else {
261       dev->ds.stencil_offset = 0;
262       dev->ds.hiz_offset = 0;
263    }
264 
265    if (ISL_DEV_GEN(dev) >= 12) {
266       dev->ds.size += GEN12_MI_LOAD_REGISTER_IMM_length * 4 * 2;
267    }
268 
269    isl_device_setup_mocs(dev);
270 }
271 
272 /**
273  * @brief Query the set of multisamples supported by the device.
274  *
275  * This function always returns non-zero, as ISL_SAMPLE_COUNT_1_BIT is always
276  * supported.
277  */
278 isl_sample_count_mask_t ATTRIBUTE_CONST
isl_device_get_sample_counts(struct isl_device * dev)279 isl_device_get_sample_counts(struct isl_device *dev)
280 {
281    if (ISL_DEV_GEN(dev) >= 9) {
282       return ISL_SAMPLE_COUNT_1_BIT |
283              ISL_SAMPLE_COUNT_2_BIT |
284              ISL_SAMPLE_COUNT_4_BIT |
285              ISL_SAMPLE_COUNT_8_BIT |
286              ISL_SAMPLE_COUNT_16_BIT;
287    } else if (ISL_DEV_GEN(dev) >= 8) {
288       return ISL_SAMPLE_COUNT_1_BIT |
289              ISL_SAMPLE_COUNT_2_BIT |
290              ISL_SAMPLE_COUNT_4_BIT |
291              ISL_SAMPLE_COUNT_8_BIT;
292    } else if (ISL_DEV_GEN(dev) >= 7) {
293       return ISL_SAMPLE_COUNT_1_BIT |
294              ISL_SAMPLE_COUNT_4_BIT |
295              ISL_SAMPLE_COUNT_8_BIT;
296    } else if (ISL_DEV_GEN(dev) >= 6) {
297       return ISL_SAMPLE_COUNT_1_BIT |
298              ISL_SAMPLE_COUNT_4_BIT;
299    } else {
300       return ISL_SAMPLE_COUNT_1_BIT;
301    }
302 }
303 
304 /**
305  * @param[out] info is written only on success
306  */
307 static void
isl_tiling_get_info(enum isl_tiling tiling,uint32_t format_bpb,struct isl_tile_info * tile_info)308 isl_tiling_get_info(enum isl_tiling tiling,
309                     uint32_t format_bpb,
310                     struct isl_tile_info *tile_info)
311 {
312    const uint32_t bs = format_bpb / 8;
313    struct isl_extent2d logical_el, phys_B;
314 
315    if (tiling != ISL_TILING_LINEAR && !isl_is_pow2(format_bpb)) {
316       /* It is possible to have non-power-of-two formats in a tiled buffer.
317        * The easiest way to handle this is to treat the tile as if it is three
318        * times as wide.  This way no pixel will ever cross a tile boundary.
319        * This really only works on legacy X and Y tiling formats.
320        */
321       assert(tiling == ISL_TILING_X || tiling == ISL_TILING_Y0);
322       assert(bs % 3 == 0 && isl_is_pow2(format_bpb / 3));
323       isl_tiling_get_info(tiling, format_bpb / 3, tile_info);
324       return;
325    }
326 
327    switch (tiling) {
328    case ISL_TILING_LINEAR:
329       assert(bs > 0);
330       logical_el = isl_extent2d(1, 1);
331       phys_B = isl_extent2d(bs, 1);
332       break;
333 
334    case ISL_TILING_X:
335       assert(bs > 0);
336       logical_el = isl_extent2d(512 / bs, 8);
337       phys_B = isl_extent2d(512, 8);
338       break;
339 
340    case ISL_TILING_Y0:
341       assert(bs > 0);
342       logical_el = isl_extent2d(128 / bs, 32);
343       phys_B = isl_extent2d(128, 32);
344       break;
345 
346    case ISL_TILING_W:
347       assert(bs == 1);
348       logical_el = isl_extent2d(64, 64);
349       /* From the Broadwell PRM Vol 2d, RENDER_SURFACE_STATE::SurfacePitch:
350        *
351        *    "If the surface is a stencil buffer (and thus has Tile Mode set
352        *    to TILEMODE_WMAJOR), the pitch must be set to 2x the value
353        *    computed based on width, as the stencil buffer is stored with two
354        *    rows interleaved."
355        *
356        * This, together with the fact that stencil buffers are referred to as
357        * being Y-tiled in the PRMs for older hardware implies that the
358        * physical size of a W-tile is actually the same as for a Y-tile.
359        */
360       phys_B = isl_extent2d(128, 32);
361       break;
362 
363    case ISL_TILING_Yf:
364    case ISL_TILING_Ys: {
365       bool is_Ys = tiling == ISL_TILING_Ys;
366 
367       assert(bs > 0);
368       unsigned width = 1 << (6 + (ffs(bs) / 2) + (2 * is_Ys));
369       unsigned height = 1 << (6 - (ffs(bs) / 2) + (2 * is_Ys));
370 
371       logical_el = isl_extent2d(width / bs, height);
372       phys_B = isl_extent2d(width, height);
373       break;
374    }
375 
376    case ISL_TILING_HIZ:
377       /* HiZ buffers are required to have ISL_FORMAT_HIZ which is an 8x4
378        * 128bpb format.  The tiling has the same physical dimensions as
379        * Y-tiling but actually has two HiZ columns per Y-tiled column.
380        */
381       assert(bs == 16);
382       logical_el = isl_extent2d(16, 16);
383       phys_B = isl_extent2d(128, 32);
384       break;
385 
386    case ISL_TILING_CCS:
387       /* CCS surfaces are required to have one of the GENX_CCS_* formats which
388        * have a block size of 1 or 2 bits per block and each CCS element
389        * corresponds to one cache-line pair in the main surface.  From the Sky
390        * Lake PRM Vol. 12 in the section on planes:
391        *
392        *    "The Color Control Surface (CCS) contains the compression status
393        *    of the cache-line pairs. The compression state of the cache-line
394        *    pair is specified by 2 bits in the CCS.  Each CCS cache-line
395        *    represents an area on the main surface of 16x16 sets of 128 byte
396        *    Y-tiled cache-line-pairs. CCS is always Y tiled."
397        *
398        * The CCS being Y-tiled implies that it's an 8x8 grid of cache-lines.
399        * Since each cache line corresponds to a 16x16 set of cache-line pairs,
400        * that yields total tile area of 128x128 cache-line pairs or CCS
401        * elements.  On older hardware, each CCS element is 1 bit and the tile
402        * is 128x256 elements.
403        */
404       assert(format_bpb == 1 || format_bpb == 2);
405       logical_el = isl_extent2d(128, 256 / format_bpb);
406       phys_B = isl_extent2d(128, 32);
407       break;
408 
409    case ISL_TILING_GEN12_CCS:
410       /* From the Bspec, Gen Graphics > Gen12 > Memory Data Formats > Memory
411        * Compression > Memory Compression - Gen12:
412        *
413        *    4 bits of auxiliary plane data are required for 2 cachelines of
414        *    main surface data. This results in a single cacheline of auxiliary
415        *    plane data mapping to 4 4K pages of main surface data for the 4K
416        *    pages (tile Y ) and 1 64K Tile Ys page.
417        *
418        * The Y-tiled pairing bit of 9 shown in the table below that Bspec
419        * section expresses that the 2 cachelines of main surface data are
420        * horizontally adjacent.
421        *
422        * TODO: Handle Ys, Yf and their pairing bits.
423        *
424        * Therefore, each CCS cacheline represents a 512Bx32 row area and each
425        * element represents a 32Bx4 row area.
426        */
427       assert(format_bpb == 4);
428       logical_el = isl_extent2d(16, 8);
429       phys_B = isl_extent2d(64, 1);
430       break;
431 
432    default:
433       unreachable("not reached");
434    } /* end switch */
435 
436    *tile_info = (struct isl_tile_info) {
437       .tiling = tiling,
438       .format_bpb = format_bpb,
439       .logical_extent_el = logical_el,
440       .phys_extent_B = phys_B,
441    };
442 }
443 
444 bool
isl_color_value_is_zero(union isl_color_value value,enum isl_format format)445 isl_color_value_is_zero(union isl_color_value value,
446                         enum isl_format format)
447 {
448    const struct isl_format_layout *fmtl = isl_format_get_layout(format);
449 
450 #define RETURN_FALSE_IF_NOT_0(c, i) \
451    if (fmtl->channels.c.bits && value.u32[i] != 0) \
452       return false
453 
454    RETURN_FALSE_IF_NOT_0(r, 0);
455    RETURN_FALSE_IF_NOT_0(g, 1);
456    RETURN_FALSE_IF_NOT_0(b, 2);
457    RETURN_FALSE_IF_NOT_0(a, 3);
458 
459 #undef RETURN_FALSE_IF_NOT_0
460 
461    return true;
462 }
463 
464 bool
isl_color_value_is_zero_one(union isl_color_value value,enum isl_format format)465 isl_color_value_is_zero_one(union isl_color_value value,
466                             enum isl_format format)
467 {
468    const struct isl_format_layout *fmtl = isl_format_get_layout(format);
469 
470 #define RETURN_FALSE_IF_NOT_0_1(c, i, field) \
471    if (fmtl->channels.c.bits && value.field[i] != 0 && value.field[i] != 1) \
472       return false
473 
474    if (isl_format_has_int_channel(format)) {
475       RETURN_FALSE_IF_NOT_0_1(r, 0, u32);
476       RETURN_FALSE_IF_NOT_0_1(g, 1, u32);
477       RETURN_FALSE_IF_NOT_0_1(b, 2, u32);
478       RETURN_FALSE_IF_NOT_0_1(a, 3, u32);
479    } else {
480       RETURN_FALSE_IF_NOT_0_1(r, 0, f32);
481       RETURN_FALSE_IF_NOT_0_1(g, 1, f32);
482       RETURN_FALSE_IF_NOT_0_1(b, 2, f32);
483       RETURN_FALSE_IF_NOT_0_1(a, 3, f32);
484    }
485 
486 #undef RETURN_FALSE_IF_NOT_0_1
487 
488    return true;
489 }
490 
491 /**
492  * @param[out] tiling is set only on success
493  */
494 static bool
isl_surf_choose_tiling(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,enum isl_tiling * tiling)495 isl_surf_choose_tiling(const struct isl_device *dev,
496                        const struct isl_surf_init_info *restrict info,
497                        enum isl_tiling *tiling)
498 {
499    isl_tiling_flags_t tiling_flags = info->tiling_flags;
500 
501    /* HiZ surfaces always use the HiZ tiling */
502    if (info->usage & ISL_SURF_USAGE_HIZ_BIT) {
503       assert(info->format == ISL_FORMAT_HIZ);
504       assert(tiling_flags == ISL_TILING_HIZ_BIT);
505       *tiling = isl_tiling_flag_to_enum(tiling_flags);
506       return true;
507    }
508 
509    /* CCS surfaces always use the CCS tiling */
510    if (info->usage & ISL_SURF_USAGE_CCS_BIT) {
511       assert(isl_format_get_layout(info->format)->txc == ISL_TXC_CCS);
512       UNUSED bool ivb_ccs = ISL_DEV_GEN(dev) < 12 &&
513                             tiling_flags == ISL_TILING_CCS_BIT;
514       UNUSED bool tgl_ccs = ISL_DEV_GEN(dev) >= 12 &&
515                             tiling_flags == ISL_TILING_GEN12_CCS_BIT;
516       assert(ivb_ccs != tgl_ccs);
517       *tiling = isl_tiling_flag_to_enum(tiling_flags);
518       return true;
519    }
520 
521    if (ISL_DEV_GEN(dev) >= 6) {
522       isl_gen6_filter_tiling(dev, info, &tiling_flags);
523    } else {
524       isl_gen4_filter_tiling(dev, info, &tiling_flags);
525    }
526 
527    #define CHOOSE(__tiling) \
528       do { \
529          if (tiling_flags & (1u << (__tiling))) { \
530             *tiling = (__tiling); \
531             return true; \
532           } \
533       } while (0)
534 
535    /* Of the tiling modes remaining, choose the one that offers the best
536     * performance.
537     */
538 
539    if (info->dim == ISL_SURF_DIM_1D) {
540       /* Prefer linear for 1D surfaces because they do not benefit from
541        * tiling. To the contrary, tiling leads to wasted memory and poor
542        * memory locality due to the swizzling and alignment restrictions
543        * required in tiled surfaces.
544        */
545       CHOOSE(ISL_TILING_LINEAR);
546    }
547 
548    CHOOSE(ISL_TILING_Ys);
549    CHOOSE(ISL_TILING_Yf);
550    CHOOSE(ISL_TILING_Y0);
551    CHOOSE(ISL_TILING_X);
552    CHOOSE(ISL_TILING_W);
553    CHOOSE(ISL_TILING_LINEAR);
554 
555    #undef CHOOSE
556 
557    /* No tiling mode accomodates the inputs. */
558    return false;
559 }
560 
561 static bool
isl_choose_msaa_layout(const struct isl_device * dev,const struct isl_surf_init_info * info,enum isl_tiling tiling,enum isl_msaa_layout * msaa_layout)562 isl_choose_msaa_layout(const struct isl_device *dev,
563                  const struct isl_surf_init_info *info,
564                  enum isl_tiling tiling,
565                  enum isl_msaa_layout *msaa_layout)
566 {
567    if (ISL_DEV_GEN(dev) >= 8) {
568       return isl_gen8_choose_msaa_layout(dev, info, tiling, msaa_layout);
569    } else if (ISL_DEV_GEN(dev) >= 7) {
570       return isl_gen7_choose_msaa_layout(dev, info, tiling, msaa_layout);
571    } else if (ISL_DEV_GEN(dev) >= 6) {
572       return isl_gen6_choose_msaa_layout(dev, info, tiling, msaa_layout);
573    } else {
574       return isl_gen4_choose_msaa_layout(dev, info, tiling, msaa_layout);
575    }
576 }
577 
578 struct isl_extent2d
isl_get_interleaved_msaa_px_size_sa(uint32_t samples)579 isl_get_interleaved_msaa_px_size_sa(uint32_t samples)
580 {
581    assert(isl_is_pow2(samples));
582 
583    /* From the Broadwell PRM >> Volume 5: Memory Views >> Computing Mip Level
584     * Sizes (p133):
585     *
586     *    If the surface is multisampled and it is a depth or stencil surface
587     *    or Multisampled Surface StorageFormat in SURFACE_STATE is
588     *    MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
589     *    proceeding: [...]
590     */
591    return (struct isl_extent2d) {
592       .width = 1 << ((ffs(samples) - 0) / 2),
593       .height = 1 << ((ffs(samples) - 1) / 2),
594    };
595 }
596 
597 static void
isl_msaa_interleaved_scale_px_to_sa(uint32_t samples,uint32_t * width,uint32_t * height)598 isl_msaa_interleaved_scale_px_to_sa(uint32_t samples,
599                                     uint32_t *width, uint32_t *height)
600 {
601    const struct isl_extent2d px_size_sa =
602       isl_get_interleaved_msaa_px_size_sa(samples);
603 
604    if (width)
605       *width = isl_align(*width, 2) * px_size_sa.width;
606    if (height)
607       *height = isl_align(*height, 2) * px_size_sa.height;
608 }
609 
610 static enum isl_array_pitch_span
isl_choose_array_pitch_span(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,enum isl_dim_layout dim_layout,const struct isl_extent4d * phys_level0_sa)611 isl_choose_array_pitch_span(const struct isl_device *dev,
612                             const struct isl_surf_init_info *restrict info,
613                             enum isl_dim_layout dim_layout,
614                             const struct isl_extent4d *phys_level0_sa)
615 {
616    switch (dim_layout) {
617    case ISL_DIM_LAYOUT_GEN9_1D:
618    case ISL_DIM_LAYOUT_GEN4_2D:
619       if (ISL_DEV_GEN(dev) >= 8) {
620          /* QPitch becomes programmable in Broadwell. So choose the
621           * most compact QPitch possible in order to conserve memory.
622           *
623           * From the Broadwell PRM >> Volume 2d: Command Reference: Structures
624           * >> RENDER_SURFACE_STATE Surface QPitch (p325):
625           *
626           *    - Software must ensure that this field is set to a value
627           *      sufficiently large such that the array slices in the surface
628           *      do not overlap. Refer to the Memory Data Formats section for
629           *      information on how surfaces are stored in memory.
630           *
631           *    - This field specifies the distance in rows between array
632           *      slices.  It is used only in the following cases:
633           *
634           *          - Surface Array is enabled OR
635           *          - Number of Mulitsamples is not NUMSAMPLES_1 and
636           *            Multisampled Surface Storage Format set to MSFMT_MSS OR
637           *          - Surface Type is SURFTYPE_CUBE
638           */
639          return ISL_ARRAY_PITCH_SPAN_COMPACT;
640       } else if (ISL_DEV_GEN(dev) >= 7) {
641          /* Note that Ivybridge introduces
642           * RENDER_SURFACE_STATE.SurfaceArraySpacing, which provides the
643           * driver more control over the QPitch.
644           */
645 
646          if (phys_level0_sa->array_len == 1) {
647             /* The hardware will never use the QPitch. So choose the most
648              * compact QPitch possible in order to conserve memory.
649              */
650             return ISL_ARRAY_PITCH_SPAN_COMPACT;
651          }
652 
653          if (isl_surf_usage_is_depth_or_stencil(info->usage) ||
654              (info->usage & ISL_SURF_USAGE_HIZ_BIT)) {
655             /* From the Ivybridge PRM >> Volume 1 Part 1: Graphics Core >>
656              * Section 6.18.4.7: Surface Arrays (p112):
657              *
658              *    If Surface Array Spacing is set to ARYSPC_FULL (note that
659              *    the depth buffer and stencil buffer have an implied value of
660              *    ARYSPC_FULL):
661              */
662             return ISL_ARRAY_PITCH_SPAN_FULL;
663          }
664 
665          if (info->levels == 1) {
666             /* We are able to set RENDER_SURFACE_STATE.SurfaceArraySpacing
667              * to ARYSPC_LOD0.
668              */
669             return ISL_ARRAY_PITCH_SPAN_COMPACT;
670          }
671 
672          return ISL_ARRAY_PITCH_SPAN_FULL;
673       } else if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) &&
674                  ISL_DEV_USE_SEPARATE_STENCIL(dev) &&
675                  isl_surf_usage_is_stencil(info->usage)) {
676          /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
677           * Graphics Core >> Section 7.18.3.7: Surface Arrays:
678           *
679           *    The separate stencil buffer does not support mip mapping, thus
680           *    the storage for LODs other than LOD 0 is not needed.
681           */
682          assert(info->levels == 1);
683          return ISL_ARRAY_PITCH_SPAN_COMPACT;
684       } else {
685          if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) &&
686              ISL_DEV_USE_SEPARATE_STENCIL(dev) &&
687              isl_surf_usage_is_stencil(info->usage)) {
688             /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
689              * Graphics Core >> Section 7.18.3.7: Surface Arrays:
690              *
691              *    The separate stencil buffer does not support mip mapping,
692              *    thus the storage for LODs other than LOD 0 is not needed.
693              */
694             assert(info->levels == 1);
695             assert(phys_level0_sa->array_len == 1);
696             return ISL_ARRAY_PITCH_SPAN_COMPACT;
697          }
698 
699          if (phys_level0_sa->array_len == 1) {
700             /* The hardware will never use the QPitch. So choose the most
701              * compact QPitch possible in order to conserve memory.
702              */
703             return ISL_ARRAY_PITCH_SPAN_COMPACT;
704          }
705 
706          return ISL_ARRAY_PITCH_SPAN_FULL;
707       }
708 
709    case ISL_DIM_LAYOUT_GEN4_3D:
710       /* The hardware will never use the QPitch. So choose the most
711        * compact QPitch possible in order to conserve memory.
712        */
713       return ISL_ARRAY_PITCH_SPAN_COMPACT;
714 
715    case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
716       /* Each array image in the gen6 stencil of HiZ surface is compact in the
717        * sense that every LOD is a compact array of the same size as LOD0.
718        */
719       return ISL_ARRAY_PITCH_SPAN_COMPACT;
720    }
721 
722    unreachable("bad isl_dim_layout");
723    return ISL_ARRAY_PITCH_SPAN_FULL;
724 }
725 
726 static void
isl_choose_image_alignment_el(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,enum isl_tiling tiling,enum isl_dim_layout dim_layout,enum isl_msaa_layout msaa_layout,struct isl_extent3d * image_align_el)727 isl_choose_image_alignment_el(const struct isl_device *dev,
728                               const struct isl_surf_init_info *restrict info,
729                               enum isl_tiling tiling,
730                               enum isl_dim_layout dim_layout,
731                               enum isl_msaa_layout msaa_layout,
732                               struct isl_extent3d *image_align_el)
733 {
734    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
735    if (fmtl->txc == ISL_TXC_MCS) {
736       assert(tiling == ISL_TILING_Y0);
737 
738       /*
739        * IvyBrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
740        *
741        * Height, width, and layout of MCS buffer in this case must match with
742        * Render Target height, width, and layout. MCS buffer is tiledY.
743        *
744        * To avoid wasting memory, choose the smallest alignment possible:
745        * HALIGN_4 and VALIGN_4.
746        */
747       *image_align_el = isl_extent3d(4, 4, 1);
748       return;
749    } else if (info->format == ISL_FORMAT_HIZ) {
750       assert(ISL_DEV_GEN(dev) >= 6);
751       if (ISL_DEV_GEN(dev) == 6) {
752          /* HiZ surfaces on Sandy Bridge are packed tightly. */
753          *image_align_el = isl_extent3d(1, 1, 1);
754       } else if (ISL_DEV_GEN(dev) < 12) {
755          /* On gen7+, HiZ surfaces are always aligned to 16x8 pixels in the
756           * primary surface which works out to 2x2 HiZ elments.
757           */
758          *image_align_el = isl_extent3d(2, 2, 1);
759       } else {
760          /* On gen12+, HiZ surfaces are always aligned to 16x16 pixels in the
761           * primary surface which works out to 2x4 HiZ elments.
762           * TODO: Verify
763           */
764          *image_align_el = isl_extent3d(2, 4, 1);
765       }
766       return;
767    }
768 
769    if (ISL_DEV_GEN(dev) >= 12) {
770       isl_gen12_choose_image_alignment_el(dev, info, tiling, dim_layout,
771                                           msaa_layout, image_align_el);
772    } else if (ISL_DEV_GEN(dev) >= 9) {
773       isl_gen9_choose_image_alignment_el(dev, info, tiling, dim_layout,
774                                          msaa_layout, image_align_el);
775    } else if (ISL_DEV_GEN(dev) >= 8) {
776       isl_gen8_choose_image_alignment_el(dev, info, tiling, dim_layout,
777                                          msaa_layout, image_align_el);
778    } else if (ISL_DEV_GEN(dev) >= 7) {
779       isl_gen7_choose_image_alignment_el(dev, info, tiling, dim_layout,
780                                           msaa_layout, image_align_el);
781    } else if (ISL_DEV_GEN(dev) >= 6) {
782       isl_gen6_choose_image_alignment_el(dev, info, tiling, dim_layout,
783                                          msaa_layout, image_align_el);
784    } else {
785       isl_gen4_choose_image_alignment_el(dev, info, tiling, dim_layout,
786                                          msaa_layout, image_align_el);
787    }
788 }
789 
790 static enum isl_dim_layout
isl_surf_choose_dim_layout(const struct isl_device * dev,enum isl_surf_dim logical_dim,enum isl_tiling tiling,isl_surf_usage_flags_t usage)791 isl_surf_choose_dim_layout(const struct isl_device *dev,
792                            enum isl_surf_dim logical_dim,
793                            enum isl_tiling tiling,
794                            isl_surf_usage_flags_t usage)
795 {
796    /* Sandy bridge needs a special layout for HiZ and stencil. */
797    if (ISL_DEV_GEN(dev) == 6 &&
798        (tiling == ISL_TILING_W || tiling == ISL_TILING_HIZ))
799       return ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ;
800 
801    if (ISL_DEV_GEN(dev) >= 9) {
802       switch (logical_dim) {
803       case ISL_SURF_DIM_1D:
804          /* From the Sky Lake PRM Vol. 5, "1D Surfaces":
805           *
806           *    One-dimensional surfaces use a tiling mode of linear.
807           *    Technically, they are not tiled resources, but the Tiled
808           *    Resource Mode field in RENDER_SURFACE_STATE is still used to
809           *    indicate the alignment requirements for this linear surface
810           *    (See 1D Alignment requirements for how 4K and 64KB Tiled
811           *    Resource Modes impact alignment). Alternatively, a 1D surface
812           *    can be defined as a 2D tiled surface (e.g. TileY or TileX) with
813           *    a height of 0.
814           *
815           * In other words, ISL_DIM_LAYOUT_GEN9_1D is only used for linear
816           * surfaces and, for tiled surfaces, ISL_DIM_LAYOUT_GEN4_2D is used.
817           */
818          if (tiling == ISL_TILING_LINEAR)
819             return ISL_DIM_LAYOUT_GEN9_1D;
820          else
821             return ISL_DIM_LAYOUT_GEN4_2D;
822       case ISL_SURF_DIM_2D:
823       case ISL_SURF_DIM_3D:
824          return ISL_DIM_LAYOUT_GEN4_2D;
825       }
826    } else {
827       switch (logical_dim) {
828       case ISL_SURF_DIM_1D:
829       case ISL_SURF_DIM_2D:
830          /* From the G45 PRM Vol. 1a, "6.17.4.1 Hardware Cube Map Layout":
831           *
832           * The cube face textures are stored in the same way as 3D surfaces
833           * are stored (see section 6.17.5 for details).  For cube surfaces,
834           * however, the depth is equal to the number of faces (always 6) and
835           * is not reduced for each MIP.
836           */
837          if (ISL_DEV_GEN(dev) == 4 && (usage & ISL_SURF_USAGE_CUBE_BIT))
838             return ISL_DIM_LAYOUT_GEN4_3D;
839 
840          return ISL_DIM_LAYOUT_GEN4_2D;
841       case ISL_SURF_DIM_3D:
842          return ISL_DIM_LAYOUT_GEN4_3D;
843       }
844    }
845 
846    unreachable("bad isl_surf_dim");
847    return ISL_DIM_LAYOUT_GEN4_2D;
848 }
849 
850 /**
851  * Calculate the physical extent of the surface's first level, in units of
852  * surface samples.
853  */
854 static void
isl_calc_phys_level0_extent_sa(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,enum isl_dim_layout dim_layout,enum isl_tiling tiling,enum isl_msaa_layout msaa_layout,struct isl_extent4d * phys_level0_sa)855 isl_calc_phys_level0_extent_sa(const struct isl_device *dev,
856                                const struct isl_surf_init_info *restrict info,
857                                enum isl_dim_layout dim_layout,
858                                enum isl_tiling tiling,
859                                enum isl_msaa_layout msaa_layout,
860                                struct isl_extent4d *phys_level0_sa)
861 {
862    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
863 
864    if (isl_format_is_planar(info->format))
865       unreachable("Planar formats unsupported");
866 
867    switch (info->dim) {
868    case ISL_SURF_DIM_1D:
869       assert(info->height == 1);
870       assert(info->depth == 1);
871       assert(info->samples == 1);
872 
873       switch (dim_layout) {
874       case ISL_DIM_LAYOUT_GEN4_3D:
875          unreachable("bad isl_dim_layout");
876 
877       case ISL_DIM_LAYOUT_GEN9_1D:
878       case ISL_DIM_LAYOUT_GEN4_2D:
879       case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
880          *phys_level0_sa = (struct isl_extent4d) {
881             .w = info->width,
882             .h = 1,
883             .d = 1,
884             .a = info->array_len,
885          };
886          break;
887       }
888       break;
889 
890    case ISL_SURF_DIM_2D:
891       if (ISL_DEV_GEN(dev) == 4 && (info->usage & ISL_SURF_USAGE_CUBE_BIT))
892          assert(dim_layout == ISL_DIM_LAYOUT_GEN4_3D);
893       else
894          assert(dim_layout == ISL_DIM_LAYOUT_GEN4_2D ||
895                 dim_layout == ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ);
896 
897       if (tiling == ISL_TILING_Ys && info->samples > 1)
898          isl_finishme("%s:%s: multisample TileYs layout", __FILE__, __func__);
899 
900       switch (msaa_layout) {
901       case ISL_MSAA_LAYOUT_NONE:
902          assert(info->depth == 1);
903          assert(info->samples == 1);
904 
905          *phys_level0_sa = (struct isl_extent4d) {
906             .w = info->width,
907             .h = info->height,
908             .d = 1,
909             .a = info->array_len,
910          };
911          break;
912 
913       case ISL_MSAA_LAYOUT_ARRAY:
914          assert(info->depth == 1);
915          assert(info->levels == 1);
916          assert(isl_format_supports_multisampling(dev->info, info->format));
917          assert(fmtl->bw == 1 && fmtl->bh == 1);
918 
919          *phys_level0_sa = (struct isl_extent4d) {
920             .w = info->width,
921             .h = info->height,
922             .d = 1,
923             .a = info->array_len * info->samples,
924          };
925          break;
926 
927       case ISL_MSAA_LAYOUT_INTERLEAVED:
928          assert(info->depth == 1);
929          assert(info->levels == 1);
930          assert(isl_format_supports_multisampling(dev->info, info->format));
931 
932          *phys_level0_sa = (struct isl_extent4d) {
933             .w = info->width,
934             .h = info->height,
935             .d = 1,
936             .a = info->array_len,
937          };
938 
939          isl_msaa_interleaved_scale_px_to_sa(info->samples,
940                                              &phys_level0_sa->w,
941                                              &phys_level0_sa->h);
942          break;
943       }
944       break;
945 
946    case ISL_SURF_DIM_3D:
947       assert(info->array_len == 1);
948       assert(info->samples == 1);
949 
950       if (fmtl->bd > 1) {
951          isl_finishme("%s:%s: compression block with depth > 1",
952                       __FILE__, __func__);
953       }
954 
955       switch (dim_layout) {
956       case ISL_DIM_LAYOUT_GEN9_1D:
957       case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
958          unreachable("bad isl_dim_layout");
959 
960       case ISL_DIM_LAYOUT_GEN4_2D:
961          assert(ISL_DEV_GEN(dev) >= 9);
962 
963          *phys_level0_sa = (struct isl_extent4d) {
964             .w = info->width,
965             .h = info->height,
966             .d = 1,
967             .a = info->depth,
968          };
969          break;
970 
971       case ISL_DIM_LAYOUT_GEN4_3D:
972          assert(ISL_DEV_GEN(dev) < 9);
973          *phys_level0_sa = (struct isl_extent4d) {
974             .w = info->width,
975             .h = info->height,
976             .d = info->depth,
977             .a = 1,
978          };
979          break;
980       }
981       break;
982    }
983 }
984 
985 /**
986  * Calculate the pitch between physical array slices, in units of rows of
987  * surface elements.
988  */
989 static uint32_t
isl_calc_array_pitch_el_rows_gen4_2d(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_tile_info * tile_info,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,enum isl_array_pitch_span array_pitch_span,const struct isl_extent2d * phys_slice0_sa)990 isl_calc_array_pitch_el_rows_gen4_2d(
991       const struct isl_device *dev,
992       const struct isl_surf_init_info *restrict info,
993       const struct isl_tile_info *tile_info,
994       const struct isl_extent3d *image_align_sa,
995       const struct isl_extent4d *phys_level0_sa,
996       enum isl_array_pitch_span array_pitch_span,
997       const struct isl_extent2d *phys_slice0_sa)
998 {
999    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1000    uint32_t pitch_sa_rows = 0;
1001 
1002    switch (array_pitch_span) {
1003    case ISL_ARRAY_PITCH_SPAN_COMPACT:
1004       pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h);
1005       break;
1006    case ISL_ARRAY_PITCH_SPAN_FULL: {
1007       /* The QPitch equation is found in the Broadwell PRM >> Volume 5:
1008        * Memory Views >> Common Surface Formats >> Surface Layout >> 2D
1009        * Surfaces >> Surface Arrays.
1010        */
1011       uint32_t H0_sa = phys_level0_sa->h;
1012       uint32_t H1_sa = isl_minify(H0_sa, 1);
1013 
1014       uint32_t h0_sa = isl_align_npot(H0_sa, image_align_sa->h);
1015       uint32_t h1_sa = isl_align_npot(H1_sa, image_align_sa->h);
1016 
1017       uint32_t m;
1018       if (ISL_DEV_GEN(dev) >= 7) {
1019          /* The QPitch equation changed slightly in Ivybridge. */
1020          m = 12;
1021       } else {
1022          m = 11;
1023       }
1024 
1025       pitch_sa_rows = h0_sa + h1_sa + (m * image_align_sa->h);
1026 
1027       if (ISL_DEV_GEN(dev) == 6 && info->samples > 1 &&
1028           (info->height % 4 == 1)) {
1029          /* [SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
1030           * Graphics Core >> Section 7.18.3.7: Surface Arrays:
1031           *
1032           *    [SNB] Errata: Sampler MSAA Qpitch will be 4 greater than
1033           *    the value calculated in the equation above , for every
1034           *    other odd Surface Height starting from 1 i.e. 1,5,9,13.
1035           *
1036           * XXX(chadv): Is the errata natural corollary of the physical
1037           * layout of interleaved samples?
1038           */
1039          pitch_sa_rows += 4;
1040       }
1041 
1042       pitch_sa_rows = isl_align_npot(pitch_sa_rows, fmtl->bh);
1043       } /* end case */
1044       break;
1045    }
1046 
1047    assert(pitch_sa_rows % fmtl->bh == 0);
1048    uint32_t pitch_el_rows = pitch_sa_rows / fmtl->bh;
1049 
1050    if (ISL_DEV_GEN(dev) >= 9 && ISL_DEV_GEN(dev) <= 11 &&
1051        fmtl->txc == ISL_TXC_CCS) {
1052       /*
1053        * From the Sky Lake PRM Vol 7, "MCS Buffer for Render Target(s)" (p. 632):
1054        *
1055        *    "Mip-mapped and arrayed surfaces are supported with MCS buffer
1056        *    layout with these alignments in the RT space: Horizontal
1057        *    Alignment = 128 and Vertical Alignment = 64."
1058        *
1059        * From the Sky Lake PRM Vol. 2d, "RENDER_SURFACE_STATE" (p. 435):
1060        *
1061        *    "For non-multisampled render target's CCS auxiliary surface,
1062        *    QPitch must be computed with Horizontal Alignment = 128 and
1063        *    Surface Vertical Alignment = 256. These alignments are only for
1064        *    CCS buffer and not for associated render target."
1065        *
1066        * The first restriction is already handled by isl_choose_image_alignment_el
1067        * but the second restriction, which is an extension of the first, only
1068        * applies to qpitch and must be applied here.
1069        *
1070        * The second restriction disappears on Gen12.
1071        */
1072       assert(fmtl->bh == 4);
1073       pitch_el_rows = isl_align(pitch_el_rows, 256 / 4);
1074    }
1075 
1076    if (ISL_DEV_GEN(dev) >= 9 &&
1077        info->dim == ISL_SURF_DIM_3D &&
1078        tile_info->tiling != ISL_TILING_LINEAR) {
1079       /* From the Skylake BSpec >> RENDER_SURFACE_STATE >> Surface QPitch:
1080        *
1081        *    Tile Mode != Linear: This field must be set to an integer multiple
1082        *    of the tile height
1083        */
1084       pitch_el_rows = isl_align(pitch_el_rows, tile_info->logical_extent_el.height);
1085    }
1086 
1087    return pitch_el_rows;
1088 }
1089 
1090 /**
1091  * A variant of isl_calc_phys_slice0_extent_sa() specific to
1092  * ISL_DIM_LAYOUT_GEN4_2D.
1093  */
1094 static void
isl_calc_phys_slice0_extent_sa_gen4_2d(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,enum isl_msaa_layout msaa_layout,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,struct isl_extent2d * phys_slice0_sa)1095 isl_calc_phys_slice0_extent_sa_gen4_2d(
1096       const struct isl_device *dev,
1097       const struct isl_surf_init_info *restrict info,
1098       enum isl_msaa_layout msaa_layout,
1099       const struct isl_extent3d *image_align_sa,
1100       const struct isl_extent4d *phys_level0_sa,
1101       struct isl_extent2d *phys_slice0_sa)
1102 {
1103    assert(phys_level0_sa->depth == 1);
1104 
1105    if (info->levels == 1) {
1106       /* Do not pad the surface to the image alignment.
1107        *
1108        * For tiled surfaces, using a reduced alignment here avoids wasting CPU
1109        * cycles on the below mipmap layout caluclations. Reducing the
1110        * alignment here is safe because we later align the row pitch and array
1111        * pitch to the tile boundary. It is safe even for
1112        * ISL_MSAA_LAYOUT_INTERLEAVED, because phys_level0_sa is already scaled
1113        * to accomodate the interleaved samples.
1114        *
1115        * For linear surfaces, reducing the alignment here permits us to later
1116        * choose an arbitrary, non-aligned row pitch. If the surface backs
1117        * a VkBuffer, then an arbitrary pitch may be needed to accomodate
1118        * VkBufferImageCopy::bufferRowLength.
1119        */
1120       *phys_slice0_sa = (struct isl_extent2d) {
1121          .w = phys_level0_sa->w,
1122          .h = phys_level0_sa->h,
1123       };
1124       return;
1125    }
1126 
1127    uint32_t slice_top_w = 0;
1128    uint32_t slice_bottom_w = 0;
1129    uint32_t slice_left_h = 0;
1130    uint32_t slice_right_h = 0;
1131 
1132    uint32_t W0 = phys_level0_sa->w;
1133    uint32_t H0 = phys_level0_sa->h;
1134 
1135    for (uint32_t l = 0; l < info->levels; ++l) {
1136       uint32_t W = isl_minify(W0, l);
1137       uint32_t H = isl_minify(H0, l);
1138 
1139       uint32_t w = isl_align_npot(W, image_align_sa->w);
1140       uint32_t h = isl_align_npot(H, image_align_sa->h);
1141 
1142       if (l == 0) {
1143          slice_top_w = w;
1144          slice_left_h = h;
1145          slice_right_h = h;
1146       } else if (l == 1) {
1147          slice_bottom_w = w;
1148          slice_left_h += h;
1149       } else if (l == 2) {
1150          slice_bottom_w += w;
1151          slice_right_h += h;
1152       } else {
1153          slice_right_h += h;
1154       }
1155    }
1156 
1157    *phys_slice0_sa = (struct isl_extent2d) {
1158       .w = MAX(slice_top_w, slice_bottom_w),
1159       .h = MAX(slice_left_h, slice_right_h),
1160    };
1161 }
1162 
1163 static void
isl_calc_phys_total_extent_el_gen4_2d(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_tile_info * tile_info,enum isl_msaa_layout msaa_layout,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,enum isl_array_pitch_span array_pitch_span,uint32_t * array_pitch_el_rows,struct isl_extent2d * total_extent_el)1164 isl_calc_phys_total_extent_el_gen4_2d(
1165       const struct isl_device *dev,
1166       const struct isl_surf_init_info *restrict info,
1167       const struct isl_tile_info *tile_info,
1168       enum isl_msaa_layout msaa_layout,
1169       const struct isl_extent3d *image_align_sa,
1170       const struct isl_extent4d *phys_level0_sa,
1171       enum isl_array_pitch_span array_pitch_span,
1172       uint32_t *array_pitch_el_rows,
1173       struct isl_extent2d *total_extent_el)
1174 {
1175    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1176 
1177    struct isl_extent2d phys_slice0_sa;
1178    isl_calc_phys_slice0_extent_sa_gen4_2d(dev, info, msaa_layout,
1179                                           image_align_sa, phys_level0_sa,
1180                                           &phys_slice0_sa);
1181    *array_pitch_el_rows =
1182       isl_calc_array_pitch_el_rows_gen4_2d(dev, info, tile_info,
1183                                            image_align_sa, phys_level0_sa,
1184                                            array_pitch_span,
1185                                            &phys_slice0_sa);
1186    *total_extent_el = (struct isl_extent2d) {
1187       .w = isl_align_div_npot(phys_slice0_sa.w, fmtl->bw),
1188       .h = *array_pitch_el_rows * (phys_level0_sa->array_len - 1) +
1189            isl_align_div_npot(phys_slice0_sa.h, fmtl->bh),
1190    };
1191 }
1192 
1193 /**
1194  * A variant of isl_calc_phys_slice0_extent_sa() specific to
1195  * ISL_DIM_LAYOUT_GEN4_3D.
1196  */
1197 static void
isl_calc_phys_total_extent_el_gen4_3d(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,uint32_t * array_pitch_el_rows,struct isl_extent2d * phys_total_el)1198 isl_calc_phys_total_extent_el_gen4_3d(
1199       const struct isl_device *dev,
1200       const struct isl_surf_init_info *restrict info,
1201       const struct isl_extent3d *image_align_sa,
1202       const struct isl_extent4d *phys_level0_sa,
1203       uint32_t *array_pitch_el_rows,
1204       struct isl_extent2d *phys_total_el)
1205 {
1206    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1207 
1208    assert(info->samples == 1);
1209 
1210    if (info->dim != ISL_SURF_DIM_3D) {
1211       /* From the G45 PRM Vol. 1a, "6.17.4.1 Hardware Cube Map Layout":
1212        *
1213        * The cube face textures are stored in the same way as 3D surfaces
1214        * are stored (see section 6.17.5 for details).  For cube surfaces,
1215        * however, the depth is equal to the number of faces (always 6) and
1216        * is not reduced for each MIP.
1217        */
1218       assert(ISL_DEV_GEN(dev) == 4);
1219       assert(info->usage & ISL_SURF_USAGE_CUBE_BIT);
1220       assert(phys_level0_sa->array_len == 6);
1221    } else {
1222       assert(phys_level0_sa->array_len == 1);
1223    }
1224 
1225    uint32_t total_w = 0;
1226    uint32_t total_h = 0;
1227 
1228    uint32_t W0 = phys_level0_sa->w;
1229    uint32_t H0 = phys_level0_sa->h;
1230    uint32_t D0 = phys_level0_sa->d;
1231    uint32_t A0 = phys_level0_sa->a;
1232 
1233    for (uint32_t l = 0; l < info->levels; ++l) {
1234       uint32_t level_w = isl_align_npot(isl_minify(W0, l), image_align_sa->w);
1235       uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa->h);
1236       uint32_t level_d = info->dim == ISL_SURF_DIM_3D ? isl_minify(D0, l) : A0;
1237 
1238       uint32_t max_layers_horiz = MIN(level_d, 1u << l);
1239       uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l);
1240 
1241       total_w = MAX(total_w, level_w * max_layers_horiz);
1242       total_h += level_h * max_layers_vert;
1243    }
1244 
1245    /* GEN4_3D layouts don't really have an array pitch since each LOD has a
1246     * different number of horizontal and vertical layers.  We have to set it
1247     * to something, so at least make it true for LOD0.
1248     */
1249    *array_pitch_el_rows =
1250       isl_align_npot(phys_level0_sa->h, image_align_sa->h) / fmtl->bw;
1251    *phys_total_el = (struct isl_extent2d) {
1252       .w = isl_assert_div(total_w, fmtl->bw),
1253       .h = isl_assert_div(total_h, fmtl->bh),
1254    };
1255 }
1256 
1257 /**
1258  * A variant of isl_calc_phys_slice0_extent_sa() specific to
1259  * ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ.
1260  */
1261 static void
isl_calc_phys_total_extent_el_gen6_stencil_hiz(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_tile_info * tile_info,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,uint32_t * array_pitch_el_rows,struct isl_extent2d * phys_total_el)1262 isl_calc_phys_total_extent_el_gen6_stencil_hiz(
1263       const struct isl_device *dev,
1264       const struct isl_surf_init_info *restrict info,
1265       const struct isl_tile_info *tile_info,
1266       const struct isl_extent3d *image_align_sa,
1267       const struct isl_extent4d *phys_level0_sa,
1268       uint32_t *array_pitch_el_rows,
1269       struct isl_extent2d *phys_total_el)
1270 {
1271    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1272 
1273    const struct isl_extent2d tile_extent_sa = {
1274       .w = tile_info->logical_extent_el.w * fmtl->bw,
1275       .h = tile_info->logical_extent_el.h * fmtl->bh,
1276    };
1277    /* Tile size is a multiple of image alignment */
1278    assert(tile_extent_sa.w % image_align_sa->w == 0);
1279    assert(tile_extent_sa.h % image_align_sa->h == 0);
1280 
1281    const uint32_t W0 = phys_level0_sa->w;
1282    const uint32_t H0 = phys_level0_sa->h;
1283 
1284    /* Each image has the same height as LOD0 because the hardware thinks
1285     * everything is LOD0
1286     */
1287    const uint32_t H = isl_align(H0, image_align_sa->h) * phys_level0_sa->a;
1288 
1289    uint32_t total_top_w = 0;
1290    uint32_t total_bottom_w = 0;
1291    uint32_t total_h = 0;
1292 
1293    for (uint32_t l = 0; l < info->levels; ++l) {
1294       const uint32_t W = isl_minify(W0, l);
1295 
1296       const uint32_t w = isl_align(W, tile_extent_sa.w);
1297       const uint32_t h = isl_align(H, tile_extent_sa.h);
1298 
1299       if (l == 0) {
1300          total_top_w = w;
1301          total_h = h;
1302       } else if (l == 1) {
1303          total_bottom_w = w;
1304          total_h += h;
1305       } else {
1306          total_bottom_w += w;
1307       }
1308    }
1309 
1310    *array_pitch_el_rows =
1311       isl_assert_div(isl_align(H0, image_align_sa->h), fmtl->bh);
1312    *phys_total_el = (struct isl_extent2d) {
1313       .w = isl_assert_div(MAX(total_top_w, total_bottom_w), fmtl->bw),
1314       .h = isl_assert_div(total_h, fmtl->bh),
1315    };
1316 }
1317 
1318 /**
1319  * A variant of isl_calc_phys_slice0_extent_sa() specific to
1320  * ISL_DIM_LAYOUT_GEN9_1D.
1321  */
1322 static void
isl_calc_phys_total_extent_el_gen9_1d(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,uint32_t * array_pitch_el_rows,struct isl_extent2d * phys_total_el)1323 isl_calc_phys_total_extent_el_gen9_1d(
1324       const struct isl_device *dev,
1325       const struct isl_surf_init_info *restrict info,
1326       const struct isl_extent3d *image_align_sa,
1327       const struct isl_extent4d *phys_level0_sa,
1328       uint32_t *array_pitch_el_rows,
1329       struct isl_extent2d *phys_total_el)
1330 {
1331    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1332 
1333    assert(phys_level0_sa->height == 1);
1334    assert(phys_level0_sa->depth == 1);
1335    assert(info->samples == 1);
1336    assert(image_align_sa->w >= fmtl->bw);
1337 
1338    uint32_t slice_w = 0;
1339    const uint32_t W0 = phys_level0_sa->w;
1340 
1341    for (uint32_t l = 0; l < info->levels; ++l) {
1342       uint32_t W = isl_minify(W0, l);
1343       uint32_t w = isl_align_npot(W, image_align_sa->w);
1344 
1345       slice_w += w;
1346    }
1347 
1348    *array_pitch_el_rows = 1;
1349    *phys_total_el = (struct isl_extent2d) {
1350       .w = isl_assert_div(slice_w, fmtl->bw),
1351       .h = phys_level0_sa->array_len,
1352    };
1353 }
1354 
1355 /**
1356  * Calculate the two-dimensional total physical extent of the surface, in
1357  * units of surface elements.
1358  */
1359 static void
isl_calc_phys_total_extent_el(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_tile_info * tile_info,enum isl_dim_layout dim_layout,enum isl_msaa_layout msaa_layout,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,enum isl_array_pitch_span array_pitch_span,uint32_t * array_pitch_el_rows,struct isl_extent2d * total_extent_el)1360 isl_calc_phys_total_extent_el(const struct isl_device *dev,
1361                               const struct isl_surf_init_info *restrict info,
1362                               const struct isl_tile_info *tile_info,
1363                               enum isl_dim_layout dim_layout,
1364                               enum isl_msaa_layout msaa_layout,
1365                               const struct isl_extent3d *image_align_sa,
1366                               const struct isl_extent4d *phys_level0_sa,
1367                               enum isl_array_pitch_span array_pitch_span,
1368                               uint32_t *array_pitch_el_rows,
1369                               struct isl_extent2d *total_extent_el)
1370 {
1371    switch (dim_layout) {
1372    case ISL_DIM_LAYOUT_GEN9_1D:
1373       assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
1374       isl_calc_phys_total_extent_el_gen9_1d(dev, info,
1375                                             image_align_sa, phys_level0_sa,
1376                                             array_pitch_el_rows,
1377                                             total_extent_el);
1378       return;
1379    case ISL_DIM_LAYOUT_GEN4_2D:
1380       isl_calc_phys_total_extent_el_gen4_2d(dev, info, tile_info, msaa_layout,
1381                                             image_align_sa, phys_level0_sa,
1382                                             array_pitch_span,
1383                                             array_pitch_el_rows,
1384                                             total_extent_el);
1385       return;
1386    case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
1387       assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
1388       isl_calc_phys_total_extent_el_gen6_stencil_hiz(dev, info, tile_info,
1389                                                      image_align_sa,
1390                                                      phys_level0_sa,
1391                                                      array_pitch_el_rows,
1392                                                      total_extent_el);
1393       return;
1394    case ISL_DIM_LAYOUT_GEN4_3D:
1395       assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
1396       isl_calc_phys_total_extent_el_gen4_3d(dev, info,
1397                                             image_align_sa, phys_level0_sa,
1398                                             array_pitch_el_rows,
1399                                             total_extent_el);
1400       return;
1401    }
1402 
1403    unreachable("invalid value for dim_layout");
1404 }
1405 
1406 static uint32_t
isl_calc_row_pitch_alignment(const struct isl_device * dev,const struct isl_surf_init_info * surf_info,const struct isl_tile_info * tile_info)1407 isl_calc_row_pitch_alignment(const struct isl_device *dev,
1408                              const struct isl_surf_init_info *surf_info,
1409                              const struct isl_tile_info *tile_info)
1410 {
1411    if (tile_info->tiling != ISL_TILING_LINEAR) {
1412       /* According to BSpec: 44930, Gen12's CCS-compressed surface pitches must
1413        * be 512B-aligned. CCS is only support on Y tilings.
1414        *
1415        * Only consider 512B alignment when :
1416        *    - AUX is not explicitly disabled
1417        *    - the caller has specified no pitch
1418        *
1419        * isl_surf_get_ccs_surf() will check that the main surface alignment
1420        * matches CCS expectations.
1421        */
1422       if (ISL_DEV_GEN(dev) >= 12 &&
1423           isl_format_supports_ccs_e(dev->info, surf_info->format) &&
1424           tile_info->tiling != ISL_TILING_X &&
1425           !(surf_info->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT) &&
1426           surf_info->row_pitch_B == 0) {
1427          return isl_align(tile_info->phys_extent_B.width, 512);
1428       }
1429 
1430       return tile_info->phys_extent_B.width;
1431    }
1432 
1433    /* From the Broadwel PRM >> Volume 2d: Command Reference: Structures >>
1434     * RENDER_SURFACE_STATE Surface Pitch (p349):
1435     *
1436     *    - For linear render target surfaces and surfaces accessed with the
1437     *      typed data port messages, the pitch must be a multiple of the
1438     *      element size for non-YUV surface formats.  Pitch must be
1439     *      a multiple of 2 * element size for YUV surface formats.
1440     *
1441     *    - [Requirements for SURFTYPE_BUFFER and SURFTYPE_STRBUF, which we
1442     *      ignore because isl doesn't do buffers.]
1443     *
1444     *    - For other linear surfaces, the pitch can be any multiple of
1445     *      bytes.
1446     */
1447    const struct isl_format_layout *fmtl = isl_format_get_layout(surf_info->format);
1448    const uint32_t bs = fmtl->bpb / 8;
1449    uint32_t alignment;
1450 
1451    if (surf_info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) {
1452       if (isl_format_is_yuv(surf_info->format)) {
1453          alignment = 2 * bs;
1454       } else  {
1455          alignment = bs;
1456       }
1457    } else {
1458       alignment = 1;
1459    }
1460 
1461    /* From the Broadwell PRM >> Volume 2c: Command Reference: Registers >>
1462     * PRI_STRIDE Stride (p1254):
1463     *
1464     *    "When using linear memory, this must be at least 64 byte aligned."
1465     */
1466    if (surf_info->usage & ISL_SURF_USAGE_DISPLAY_BIT)
1467       alignment = isl_align(alignment, 64);
1468 
1469    return alignment;
1470 }
1471 
1472 static uint32_t
isl_calc_linear_min_row_pitch(const struct isl_device * dev,const struct isl_surf_init_info * info,const struct isl_extent2d * phys_total_el,uint32_t alignment_B)1473 isl_calc_linear_min_row_pitch(const struct isl_device *dev,
1474                               const struct isl_surf_init_info *info,
1475                               const struct isl_extent2d *phys_total_el,
1476                               uint32_t alignment_B)
1477 {
1478    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1479    const uint32_t bs = fmtl->bpb / 8;
1480 
1481    return isl_align_npot(bs * phys_total_el->w, alignment_B);
1482 }
1483 
1484 static uint32_t
isl_calc_tiled_min_row_pitch(const struct isl_device * dev,const struct isl_surf_init_info * surf_info,const struct isl_tile_info * tile_info,const struct isl_extent2d * phys_total_el,uint32_t alignment_B)1485 isl_calc_tiled_min_row_pitch(const struct isl_device *dev,
1486                              const struct isl_surf_init_info *surf_info,
1487                              const struct isl_tile_info *tile_info,
1488                              const struct isl_extent2d *phys_total_el,
1489                              uint32_t alignment_B)
1490 {
1491    const struct isl_format_layout *fmtl = isl_format_get_layout(surf_info->format);
1492 
1493    assert(fmtl->bpb % tile_info->format_bpb == 0);
1494 
1495    const uint32_t tile_el_scale = fmtl->bpb / tile_info->format_bpb;
1496    const uint32_t total_w_tl =
1497       isl_align_div(phys_total_el->w * tile_el_scale,
1498                     tile_info->logical_extent_el.width);
1499 
1500    /* In some cases the alignment of the pitch might be > to the tile size
1501     * (for example Gen12 CCS requires 512B alignment while the tile's width
1502     * can be 128B), so align the row pitch to the alignment.
1503     */
1504    assert(alignment_B >= tile_info->phys_extent_B.width);
1505    return isl_align(total_w_tl * tile_info->phys_extent_B.width, alignment_B);
1506 }
1507 
1508 static uint32_t
isl_calc_min_row_pitch(const struct isl_device * dev,const struct isl_surf_init_info * surf_info,const struct isl_tile_info * tile_info,const struct isl_extent2d * phys_total_el,uint32_t alignment_B)1509 isl_calc_min_row_pitch(const struct isl_device *dev,
1510                        const struct isl_surf_init_info *surf_info,
1511                        const struct isl_tile_info *tile_info,
1512                        const struct isl_extent2d *phys_total_el,
1513                        uint32_t alignment_B)
1514 {
1515    if (tile_info->tiling == ISL_TILING_LINEAR) {
1516       return isl_calc_linear_min_row_pitch(dev, surf_info, phys_total_el,
1517                                            alignment_B);
1518    } else {
1519       return isl_calc_tiled_min_row_pitch(dev, surf_info, tile_info,
1520                                           phys_total_el, alignment_B);
1521    }
1522 }
1523 
1524 /**
1525  * Is `pitch` in the valid range for a hardware bitfield, if the bitfield's
1526  * size is `bits` bits?
1527  *
1528  * Hardware pitch fields are offset by 1. For example, if the size of
1529  * RENDER_SURFACE_STATE::SurfacePitch is B bits, then the range of valid
1530  * pitches is [1, 2^b] inclusive.  If the surface pitch is N, then
1531  * RENDER_SURFACE_STATE::SurfacePitch must be set to N-1.
1532  */
1533 static bool
pitch_in_range(uint32_t n,uint32_t bits)1534 pitch_in_range(uint32_t n, uint32_t bits)
1535 {
1536    assert(n != 0);
1537    return likely(bits != 0 && 1 <= n && n <= (1 << bits));
1538 }
1539 
1540 static bool
isl_calc_row_pitch(const struct isl_device * dev,const struct isl_surf_init_info * surf_info,const struct isl_tile_info * tile_info,enum isl_dim_layout dim_layout,const struct isl_extent2d * phys_total_el,uint32_t * out_row_pitch_B)1541 isl_calc_row_pitch(const struct isl_device *dev,
1542                    const struct isl_surf_init_info *surf_info,
1543                    const struct isl_tile_info *tile_info,
1544                    enum isl_dim_layout dim_layout,
1545                    const struct isl_extent2d *phys_total_el,
1546                    uint32_t *out_row_pitch_B)
1547 {
1548    uint32_t alignment_B =
1549       isl_calc_row_pitch_alignment(dev, surf_info, tile_info);
1550 
1551    const uint32_t min_row_pitch_B =
1552       isl_calc_min_row_pitch(dev, surf_info, tile_info, phys_total_el,
1553                              alignment_B);
1554 
1555    if (surf_info->row_pitch_B != 0) {
1556       if (surf_info->row_pitch_B < min_row_pitch_B)
1557          return false;
1558 
1559       if (surf_info->row_pitch_B % alignment_B != 0)
1560          return false;
1561    }
1562 
1563    const uint32_t row_pitch_B =
1564       surf_info->row_pitch_B != 0 ? surf_info->row_pitch_B : min_row_pitch_B;
1565 
1566    const uint32_t row_pitch_tl = row_pitch_B / tile_info->phys_extent_B.width;
1567 
1568    if (row_pitch_B == 0)
1569       return false;
1570 
1571    if (dim_layout == ISL_DIM_LAYOUT_GEN9_1D) {
1572       /* SurfacePitch is ignored for this layout. */
1573       goto done;
1574    }
1575 
1576    if ((surf_info->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT |
1577                             ISL_SURF_USAGE_TEXTURE_BIT |
1578                             ISL_SURF_USAGE_STORAGE_BIT)) &&
1579        !pitch_in_range(row_pitch_B, RENDER_SURFACE_STATE_SurfacePitch_bits(dev->info)))
1580       return false;
1581 
1582    if ((surf_info->usage & (ISL_SURF_USAGE_CCS_BIT |
1583                             ISL_SURF_USAGE_MCS_BIT)) &&
1584        !pitch_in_range(row_pitch_tl, RENDER_SURFACE_STATE_AuxiliarySurfacePitch_bits(dev->info)))
1585       return false;
1586 
1587    if ((surf_info->usage & ISL_SURF_USAGE_DEPTH_BIT) &&
1588        !pitch_in_range(row_pitch_B, _3DSTATE_DEPTH_BUFFER_SurfacePitch_bits(dev->info)))
1589       return false;
1590 
1591    if ((surf_info->usage & ISL_SURF_USAGE_HIZ_BIT) &&
1592        !pitch_in_range(row_pitch_B, _3DSTATE_HIER_DEPTH_BUFFER_SurfacePitch_bits(dev->info)))
1593       return false;
1594 
1595    const uint32_t stencil_pitch_bits = dev->use_separate_stencil ?
1596       _3DSTATE_STENCIL_BUFFER_SurfacePitch_bits(dev->info) :
1597       _3DSTATE_DEPTH_BUFFER_SurfacePitch_bits(dev->info);
1598 
1599    if ((surf_info->usage & ISL_SURF_USAGE_STENCIL_BIT) &&
1600        !pitch_in_range(row_pitch_B, stencil_pitch_bits))
1601       return false;
1602 
1603  done:
1604    *out_row_pitch_B = row_pitch_B;
1605    return true;
1606 }
1607 
1608 bool
isl_surf_init_s(const struct isl_device * dev,struct isl_surf * surf,const struct isl_surf_init_info * restrict info)1609 isl_surf_init_s(const struct isl_device *dev,
1610                 struct isl_surf *surf,
1611                 const struct isl_surf_init_info *restrict info)
1612 {
1613    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1614 
1615    const struct isl_extent4d logical_level0_px = {
1616       .w = info->width,
1617       .h = info->height,
1618       .d = info->depth,
1619       .a = info->array_len,
1620    };
1621 
1622    enum isl_tiling tiling;
1623    if (!isl_surf_choose_tiling(dev, info, &tiling))
1624       return false;
1625 
1626    struct isl_tile_info tile_info;
1627    isl_tiling_get_info(tiling, fmtl->bpb, &tile_info);
1628 
1629    const enum isl_dim_layout dim_layout =
1630       isl_surf_choose_dim_layout(dev, info->dim, tiling, info->usage);
1631 
1632    enum isl_msaa_layout msaa_layout;
1633    if (!isl_choose_msaa_layout(dev, info, tiling, &msaa_layout))
1634        return false;
1635 
1636    struct isl_extent3d image_align_el;
1637    isl_choose_image_alignment_el(dev, info, tiling, dim_layout, msaa_layout,
1638                                  &image_align_el);
1639 
1640    struct isl_extent3d image_align_sa =
1641       isl_extent3d_el_to_sa(info->format, image_align_el);
1642 
1643    struct isl_extent4d phys_level0_sa;
1644    isl_calc_phys_level0_extent_sa(dev, info, dim_layout, tiling, msaa_layout,
1645                                   &phys_level0_sa);
1646 
1647    enum isl_array_pitch_span array_pitch_span =
1648       isl_choose_array_pitch_span(dev, info, dim_layout, &phys_level0_sa);
1649 
1650    uint32_t array_pitch_el_rows;
1651    struct isl_extent2d phys_total_el;
1652    isl_calc_phys_total_extent_el(dev, info, &tile_info,
1653                                  dim_layout, msaa_layout,
1654                                  &image_align_sa, &phys_level0_sa,
1655                                  array_pitch_span, &array_pitch_el_rows,
1656                                  &phys_total_el);
1657 
1658    uint32_t row_pitch_B;
1659    if (!isl_calc_row_pitch(dev, info, &tile_info, dim_layout,
1660                            &phys_total_el, &row_pitch_B))
1661       return false;
1662 
1663    uint32_t base_alignment_B;
1664    uint64_t size_B;
1665    if (tiling == ISL_TILING_LINEAR) {
1666       size_B = (uint64_t) row_pitch_B * phys_total_el.h;
1667 
1668       /* From the Broadwell PRM Vol 2d, RENDER_SURFACE_STATE::SurfaceBaseAddress:
1669        *
1670        *    "The Base Address for linear render target surfaces and surfaces
1671        *    accessed with the typed surface read/write data port messages must
1672        *    be element-size aligned, for non-YUV surface formats, or a
1673        *    multiple of 2 element-sizes for YUV surface formats. Other linear
1674        *    surfaces have no alignment requirements (byte alignment is
1675        *    sufficient.)"
1676        */
1677       base_alignment_B = MAX(1, info->min_alignment_B);
1678       if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) {
1679          if (isl_format_is_yuv(info->format)) {
1680             base_alignment_B = MAX(base_alignment_B, fmtl->bpb / 4);
1681          } else {
1682             base_alignment_B = MAX(base_alignment_B, fmtl->bpb / 8);
1683          }
1684       }
1685       base_alignment_B = isl_round_up_to_power_of_two(base_alignment_B);
1686 
1687       /* From the Skylake PRM Vol 2c, PLANE_STRIDE::Stride:
1688        *
1689        *     "For Linear memory, this field specifies the stride in chunks of
1690        *     64 bytes (1 cache line)."
1691        */
1692       if (isl_surf_usage_is_display(info->usage))
1693          base_alignment_B = MAX(base_alignment_B, 64);
1694    } else {
1695       const uint32_t total_h_tl =
1696          isl_align_div(phys_total_el.h, tile_info.logical_extent_el.height);
1697 
1698       size_B = (uint64_t) total_h_tl * tile_info.phys_extent_B.height * row_pitch_B;
1699 
1700       const uint32_t tile_size_B = tile_info.phys_extent_B.width *
1701                                    tile_info.phys_extent_B.height;
1702       assert(isl_is_pow2(info->min_alignment_B) && isl_is_pow2(tile_size_B));
1703       base_alignment_B = MAX(info->min_alignment_B, tile_size_B);
1704 
1705       /* The diagram in the Bspec section Memory Compression - Gen12, shows
1706        * that the CCS is indexed in 256B chunks. However, the
1707        * PLANE_AUX_DIST::Auxiliary Surface Distance field is in units of 4K
1708        * pages. We currently don't assign the usage field like we do for main
1709        * surfaces, so just use 4K for now.
1710        */
1711       if (tiling == ISL_TILING_GEN12_CCS)
1712          base_alignment_B = MAX(base_alignment_B, 4096);
1713 
1714       /* Gen12+ requires that images be 64K-aligned if they're going to used
1715        * with CCS.  This is because the Aux translation table maps main
1716        * surface addresses to aux addresses at a 64K (in the main surface)
1717        * granularity.  Because we don't know for sure in ISL if a surface will
1718        * use CCS, we have to guess based on the DISABLE_AUX usage bit.  The
1719        * one thing we do know is that we haven't enable CCS on linear images
1720        * yet so we can avoid the extra alignment there.
1721        */
1722       if (ISL_DEV_GEN(dev) >= 12 &&
1723           !(info->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT)) {
1724          base_alignment_B = MAX(base_alignment_B, 64 * 1024);
1725       }
1726    }
1727 
1728    if (ISL_DEV_GEN(dev) < 9) {
1729       /* From the Broadwell PRM Vol 5, Surface Layout:
1730        *
1731        *    "In addition to restrictions on maximum height, width, and depth,
1732        *     surfaces are also restricted to a maximum size in bytes. This
1733        *     maximum is 2 GB for all products and all surface types."
1734        *
1735        * This comment is applicable to all Pre-gen9 platforms.
1736        */
1737       if (size_B > (uint64_t) 1 << 31)
1738          return false;
1739    } else if (ISL_DEV_GEN(dev) < 11) {
1740       /* From the Skylake PRM Vol 5, Maximum Surface Size in Bytes:
1741        *    "In addition to restrictions on maximum height, width, and depth,
1742        *     surfaces are also restricted to a maximum size of 2^38 bytes.
1743        *     All pixels within the surface must be contained within 2^38 bytes
1744        *     of the base address."
1745        */
1746       if (size_B > (uint64_t) 1 << 38)
1747          return false;
1748    } else {
1749       /* gen11+ platforms raised this limit to 2^44 bytes. */
1750       if (size_B > (uint64_t) 1 << 44)
1751          return false;
1752    }
1753 
1754    *surf = (struct isl_surf) {
1755       .dim = info->dim,
1756       .dim_layout = dim_layout,
1757       .msaa_layout = msaa_layout,
1758       .tiling = tiling,
1759       .format = info->format,
1760 
1761       .levels = info->levels,
1762       .samples = info->samples,
1763 
1764       .image_alignment_el = image_align_el,
1765       .logical_level0_px = logical_level0_px,
1766       .phys_level0_sa = phys_level0_sa,
1767 
1768       .size_B = size_B,
1769       .alignment_B = base_alignment_B,
1770       .row_pitch_B = row_pitch_B,
1771       .array_pitch_el_rows = array_pitch_el_rows,
1772       .array_pitch_span = array_pitch_span,
1773 
1774       .usage = info->usage,
1775    };
1776 
1777    return true;
1778 }
1779 
1780 void
isl_surf_get_tile_info(const struct isl_surf * surf,struct isl_tile_info * tile_info)1781 isl_surf_get_tile_info(const struct isl_surf *surf,
1782                        struct isl_tile_info *tile_info)
1783 {
1784    const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
1785    isl_tiling_get_info(surf->tiling, fmtl->bpb, tile_info);
1786 }
1787 
1788 bool
isl_surf_get_hiz_surf(const struct isl_device * dev,const struct isl_surf * surf,struct isl_surf * hiz_surf)1789 isl_surf_get_hiz_surf(const struct isl_device *dev,
1790                       const struct isl_surf *surf,
1791                       struct isl_surf *hiz_surf)
1792 {
1793    assert(ISL_DEV_GEN(dev) >= 5 && ISL_DEV_USE_SEPARATE_STENCIL(dev));
1794 
1795    if (!isl_surf_usage_is_depth(surf->usage))
1796       return false;
1797 
1798    /* HiZ only works with Y-tiled depth buffers */
1799    if (!isl_tiling_is_any_y(surf->tiling))
1800       return false;
1801 
1802    /* On SNB+, compressed depth buffers cannot be interleaved with stencil. */
1803    switch (surf->format) {
1804    case ISL_FORMAT_R24_UNORM_X8_TYPELESS:
1805       if (isl_surf_usage_is_depth_and_stencil(surf->usage)) {
1806          assert(ISL_DEV_GEN(dev) == 5);
1807          unreachable("This should work, but is untested");
1808       }
1809       /* Fall through */
1810    case ISL_FORMAT_R16_UNORM:
1811    case ISL_FORMAT_R32_FLOAT:
1812       break;
1813    case ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS:
1814       if (ISL_DEV_GEN(dev) == 5) {
1815          assert(isl_surf_usage_is_depth_and_stencil(surf->usage));
1816          unreachable("This should work, but is untested");
1817       }
1818       /* Fall through */
1819    default:
1820       return false;
1821    }
1822 
1823    /* Multisampled depth is always interleaved */
1824    assert(surf->msaa_layout == ISL_MSAA_LAYOUT_NONE ||
1825           surf->msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED);
1826 
1827    /* From the Broadwell PRM Vol. 7, "Hierarchical Depth Buffer":
1828     *
1829     *    "The Surface Type, Height, Width, Depth, Minimum Array Element, Render
1830     *    Target View Extent, and Depth Coordinate Offset X/Y of the
1831     *    hierarchical depth buffer are inherited from the depth buffer. The
1832     *    height and width of the hierarchical depth buffer that must be
1833     *    allocated are computed by the following formulas, where HZ is the
1834     *    hierarchical depth buffer and Z is the depth buffer. The Z_Height,
1835     *    Z_Width, and Z_Depth values given in these formulas are those present
1836     *    in 3DSTATE_DEPTH_BUFFER incremented by one.
1837     *
1838     *    "The value of Z_Height and Z_Width must each be multiplied by 2 before
1839     *    being applied to the table below if Number of Multisamples is set to
1840     *    NUMSAMPLES_4. The value of Z_Height must be multiplied by 2 and
1841     *    Z_Width must be multiplied by 4 before being applied to the table
1842     *    below if Number of Multisamples is set to NUMSAMPLES_8."
1843     *
1844     * In the Sky Lake PRM, the second paragraph is replaced with this:
1845     *
1846     *    "The Z_Height and Z_Width values must equal those present in
1847     *    3DSTATE_DEPTH_BUFFER incremented by one."
1848     *
1849     * In other words, on Sandy Bridge through Broadwell, each 128-bit HiZ
1850     * block corresponds to a region of 8x4 samples in the primary depth
1851     * surface.  On Sky Lake, on the other hand, each HiZ block corresponds to
1852     * a region of 8x4 pixels in the primary depth surface regardless of the
1853     * number of samples.  The dimensions of a HiZ block in both pixels and
1854     * samples are given in the table below:
1855     *
1856     *                    | SNB - BDW |     SKL+
1857     *              ------+-----------+-------------
1858     *                1x  |  8 x 4 sa |   8 x 4 sa
1859     *               MSAA |  8 x 4 px |   8 x 4 px
1860     *              ------+-----------+-------------
1861     *                2x  |  8 x 4 sa |  16 x 4 sa
1862     *               MSAA |  4 x 4 px |   8 x 4 px
1863     *              ------+-----------+-------------
1864     *                4x  |  8 x 4 sa |  16 x 8 sa
1865     *               MSAA |  4 x 2 px |   8 x 4 px
1866     *              ------+-----------+-------------
1867     *                8x  |  8 x 4 sa |  32 x 8 sa
1868     *               MSAA |  2 x 2 px |   8 x 4 px
1869     *              ------+-----------+-------------
1870     *               16x  |    N/A    | 32 x 16 sa
1871     *               MSAA |    N/A    |  8 x  4 px
1872     *              ------+-----------+-------------
1873     *
1874     * There are a number of different ways that this discrepency could be
1875     * handled.  The way we have chosen is to simply make MSAA HiZ have the
1876     * same number of samples as the parent surface pre-Sky Lake and always be
1877     * single-sampled on Sky Lake and above.  Since the block sizes of
1878     * compressed formats are given in samples, this neatly handles everything
1879     * without the need for additional HiZ formats with different block sizes
1880     * on SKL+.
1881     */
1882    const unsigned samples = ISL_DEV_GEN(dev) >= 9 ? 1 : surf->samples;
1883 
1884    return isl_surf_init(dev, hiz_surf,
1885                         .dim = surf->dim,
1886                         .format = ISL_FORMAT_HIZ,
1887                         .width = surf->logical_level0_px.width,
1888                         .height = surf->logical_level0_px.height,
1889                         .depth = surf->logical_level0_px.depth,
1890                         .levels = surf->levels,
1891                         .array_len = surf->logical_level0_px.array_len,
1892                         .samples = samples,
1893                         .usage = ISL_SURF_USAGE_HIZ_BIT,
1894                         .tiling_flags = ISL_TILING_HIZ_BIT);
1895 }
1896 
1897 bool
isl_surf_get_mcs_surf(const struct isl_device * dev,const struct isl_surf * surf,struct isl_surf * mcs_surf)1898 isl_surf_get_mcs_surf(const struct isl_device *dev,
1899                       const struct isl_surf *surf,
1900                       struct isl_surf *mcs_surf)
1901 {
1902    /* It must be multisampled with an array layout */
1903    if (surf->msaa_layout != ISL_MSAA_LAYOUT_ARRAY)
1904       return false;
1905 
1906    if (mcs_surf->size_B > 0)
1907       return false;
1908 
1909    /* The following are true of all multisampled surfaces */
1910    assert(surf->samples > 1);
1911    assert(surf->dim == ISL_SURF_DIM_2D);
1912    assert(surf->levels == 1);
1913    assert(surf->logical_level0_px.depth == 1);
1914 
1915    /* From the Ivy Bridge PRM, Vol4 Part1 p77 ("MCS Enable"):
1916     *
1917     *   This field must be set to 0 for all SINT MSRTs when all RT channels
1918     *   are not written
1919     *
1920     * In practice this means that we have to disable MCS for all signed
1921     * integer MSAA buffers.  The alternative, to disable MCS only when one
1922     * of the render target channels is disabled, is impractical because it
1923     * would require converting between CMS and UMS MSAA layouts on the fly,
1924     * which is expensive.
1925     */
1926    if (ISL_DEV_GEN(dev) == 7 && isl_format_has_sint_channel(surf->format))
1927       return false;
1928 
1929    /* The "Auxiliary Surface Pitch" field in RENDER_SURFACE_STATE is only 9
1930     * bits which means the maximum pitch of a compression surface is 512
1931     * tiles or 64KB (since MCS is always Y-tiled).  Since a 16x MCS buffer is
1932     * 64bpp, this gives us a maximum width of 8192 pixels.  We can create
1933     * larger multisampled surfaces, we just can't compress them.   For 2x, 4x,
1934     * and 8x, we have enough room for the full 16k supported by the hardware.
1935     */
1936    if (surf->samples == 16 && surf->logical_level0_px.width > 8192)
1937       return false;
1938 
1939    enum isl_format mcs_format;
1940    switch (surf->samples) {
1941    case 2:  mcs_format = ISL_FORMAT_MCS_2X;  break;
1942    case 4:  mcs_format = ISL_FORMAT_MCS_4X;  break;
1943    case 8:  mcs_format = ISL_FORMAT_MCS_8X;  break;
1944    case 16: mcs_format = ISL_FORMAT_MCS_16X; break;
1945    default:
1946       unreachable("Invalid sample count");
1947    }
1948 
1949    return isl_surf_init(dev, mcs_surf,
1950                         .dim = ISL_SURF_DIM_2D,
1951                         .format = mcs_format,
1952                         .width = surf->logical_level0_px.width,
1953                         .height = surf->logical_level0_px.height,
1954                         .depth = 1,
1955                         .levels = 1,
1956                         .array_len = surf->logical_level0_px.array_len,
1957                         .samples = 1, /* MCS surfaces are really single-sampled */
1958                         .usage = ISL_SURF_USAGE_MCS_BIT,
1959                         .tiling_flags = ISL_TILING_Y0_BIT);
1960 }
1961 
1962 bool
isl_surf_supports_ccs(const struct isl_device * dev,const struct isl_surf * surf)1963 isl_surf_supports_ccs(const struct isl_device *dev,
1964                       const struct isl_surf *surf)
1965 {
1966    /* CCS support does not exist prior to Gen7 */
1967    if (ISL_DEV_GEN(dev) <= 6)
1968       return false;
1969 
1970    if (surf->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT)
1971       return false;
1972 
1973    if (isl_format_is_compressed(surf->format))
1974       return false;
1975 
1976    if (!isl_is_pow2(isl_format_get_layout(surf->format)->bpb))
1977       return false;
1978 
1979    /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
1980     * Target(s)", beneath the "Fast Color Clear" bullet (p326):
1981     *
1982     *     - Support is limited to tiled render targets.
1983     *
1984     * From the Skylake documentation, it is made clear that X-tiling is no
1985     * longer supported:
1986     *
1987     *     - MCS and Lossless compression is supported for
1988     *       TiledY/TileYs/TileYf non-MSRTs only.
1989     *
1990     * From the BSpec (44930) for Gen12:
1991     *
1992     *    Linear CCS is only allowed for Untyped Buffers but only via HDC
1993     *    Data-Port messages.
1994     *
1995     * We never use untyped messages on surfaces created by ISL on Gen9+ so
1996     * this means linear is out on Gen12+ as well.
1997     */
1998    if (surf->tiling == ISL_TILING_LINEAR)
1999       return false;
2000 
2001    if (ISL_DEV_GEN(dev) >= 12) {
2002       if (isl_surf_usage_is_stencil(surf->usage) && surf->samples > 1)
2003          return false;
2004 
2005       /* On Gen12, all CCS-compressed surface pitches must be multiples of
2006        * 512B.
2007        */
2008       if (surf->row_pitch_B % 512 != 0)
2009          return false;
2010 
2011       /* According to GEN:BUG:1406738321, 3D textures need a blit to a new
2012        * surface in order to perform a resolve. For now, just disable CCS.
2013        */
2014       if (surf->dim == ISL_SURF_DIM_3D) {
2015          isl_finishme("%s:%s: CCS for 3D textures is disabled, but a workaround"
2016                       " is available.", __FILE__, __func__);
2017          return false;
2018       }
2019 
2020       /* GEN:BUG:1207137018
2021        *
2022        * TODO: implement following workaround currently covered by the
2023        * restriction above. If following conditions are met:
2024        *
2025        *    - RENDER_SURFACE_STATE.Surface Type == 3D
2026        *    - RENDER_SURFACE_STATE.Auxiliary Surface Mode != AUX_NONE
2027        *    - RENDER_SURFACE_STATE.Tiled ResourceMode is TYF or TYS
2028        *
2029        * Set the value of RENDER_SURFACE_STATE.Mip Tail Start LOD to a mip
2030        * that larger than those present in the surface (i.e. 15)
2031        */
2032 
2033       /* TODO: Handle the other tiling formats */
2034       if (surf->tiling != ISL_TILING_Y0)
2035          return false;
2036    } else {
2037       /* ISL_DEV_GEN(dev) < 12 */
2038       if (surf->samples > 1)
2039          return false;
2040 
2041       /* CCS is only for color images on Gen7-11 */
2042       if (isl_surf_usage_is_depth_or_stencil(surf->usage))
2043          return false;
2044 
2045       /* The PRM doesn't say this explicitly, but fast-clears don't appear to
2046        * work for 3D textures until gen9 where the layout of 3D textures
2047        * changes to match 2D array textures.
2048        */
2049       if (ISL_DEV_GEN(dev) <= 8 && surf->dim != ISL_SURF_DIM_2D)
2050          return false;
2051 
2052       /* From the HSW PRM Volume 7: 3D-Media-GPGPU, page 652 (Color Clear of
2053        * Non-MultiSampler Render Target Restrictions):
2054        *
2055        *    "Support is for non-mip-mapped and non-array surface types only."
2056        *
2057        * This restriction is lifted on gen8+.  Technically, it may be possible
2058        * to create a CCS for an arrayed or mipmapped image and only enable
2059        * CCS_D when rendering to the base slice.  However, there is no
2060        * documentation tell us what the hardware would do in that case or what
2061        * it does if you walk off the bases slice.  (Does it ignore CCS or does
2062        * it start scribbling over random memory?)  We play it safe and just
2063        * follow the docs and don't allow CCS_D for arrayed or mip-mapped
2064        * surfaces.
2065        */
2066       if (ISL_DEV_GEN(dev) <= 7 &&
2067           (surf->levels > 1 || surf->logical_level0_px.array_len > 1))
2068          return false;
2069 
2070       /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
2071        * Target(s)", beneath the "Fast Color Clear" bullet (p326):
2072        *
2073        *     - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
2074        *       64bpp, and 128bpp.
2075        */
2076       if (isl_format_get_layout(surf->format)->bpb < 32)
2077          return false;
2078 
2079       /* From the Skylake documentation, it is made clear that X-tiling is no
2080        * longer supported:
2081        *
2082        *     - MCS and Lossless compression is supported for
2083        *     TiledY/TileYs/TileYf non-MSRTs only.
2084        */
2085       if (ISL_DEV_GEN(dev) >= 9 && !isl_tiling_is_any_y(surf->tiling))
2086          return false;
2087    }
2088 
2089    return true;
2090 }
2091 
2092 bool
isl_surf_get_ccs_surf(const struct isl_device * dev,const struct isl_surf * surf,struct isl_surf * aux_surf,struct isl_surf * extra_aux_surf,uint32_t row_pitch_B)2093 isl_surf_get_ccs_surf(const struct isl_device *dev,
2094                       const struct isl_surf *surf,
2095                       struct isl_surf *aux_surf,
2096                       struct isl_surf *extra_aux_surf,
2097                       uint32_t row_pitch_B)
2098 {
2099    assert(aux_surf);
2100 
2101    /* An uninitialized surface is needed to get a CCS surface. */
2102    if (aux_surf->size_B > 0 &&
2103        (extra_aux_surf == NULL || extra_aux_surf->size_B > 0)) {
2104       return false;
2105    }
2106 
2107    /* A surface can't have two CCS surfaces. */
2108    if (aux_surf->usage & ISL_SURF_USAGE_CCS_BIT)
2109       return false;
2110 
2111    if (!isl_surf_supports_ccs(dev, surf))
2112       return false;
2113 
2114    if (ISL_DEV_GEN(dev) >= 12) {
2115       enum isl_format ccs_format;
2116       switch (isl_format_get_layout(surf->format)->bpb) {
2117       case 8:     ccs_format = ISL_FORMAT_GEN12_CCS_8BPP_Y0;    break;
2118       case 16:    ccs_format = ISL_FORMAT_GEN12_CCS_16BPP_Y0;   break;
2119       case 32:    ccs_format = ISL_FORMAT_GEN12_CCS_32BPP_Y0;   break;
2120       case 64:    ccs_format = ISL_FORMAT_GEN12_CCS_64BPP_Y0;   break;
2121       case 128:   ccs_format = ISL_FORMAT_GEN12_CCS_128BPP_Y0;  break;
2122       default:
2123          return false;
2124       }
2125 
2126       /* On Gen12, the CCS is a scaled-down version of the main surface. We
2127        * model this as the CCS compressing a 2D-view of the entire surface.
2128        */
2129       struct isl_surf *ccs_surf =
2130          aux_surf->size_B > 0 ? extra_aux_surf : aux_surf;
2131       const bool ok =
2132          isl_surf_init(dev, ccs_surf,
2133                        .dim = ISL_SURF_DIM_2D,
2134                        .format = ccs_format,
2135                        .width = isl_surf_get_row_pitch_el(surf),
2136                        .height = surf->size_B / surf->row_pitch_B,
2137                        .depth = 1,
2138                        .levels = 1,
2139                        .array_len = 1,
2140                        .samples = 1,
2141                        .row_pitch_B = row_pitch_B,
2142                        .usage = ISL_SURF_USAGE_CCS_BIT,
2143                        .tiling_flags = ISL_TILING_GEN12_CCS_BIT);
2144       assert(!ok || ccs_surf->size_B == surf->size_B / 256);
2145       return ok;
2146    } else {
2147       enum isl_format ccs_format;
2148       if (ISL_DEV_GEN(dev) >= 9) {
2149          switch (isl_format_get_layout(surf->format)->bpb) {
2150          case 32:    ccs_format = ISL_FORMAT_GEN9_CCS_32BPP;   break;
2151          case 64:    ccs_format = ISL_FORMAT_GEN9_CCS_64BPP;   break;
2152          case 128:   ccs_format = ISL_FORMAT_GEN9_CCS_128BPP;  break;
2153          default:    unreachable("Unsupported CCS format");
2154             return false;
2155          }
2156       } else if (surf->tiling == ISL_TILING_Y0) {
2157          switch (isl_format_get_layout(surf->format)->bpb) {
2158          case 32:    ccs_format = ISL_FORMAT_GEN7_CCS_32BPP_Y;    break;
2159          case 64:    ccs_format = ISL_FORMAT_GEN7_CCS_64BPP_Y;    break;
2160          case 128:   ccs_format = ISL_FORMAT_GEN7_CCS_128BPP_Y;   break;
2161          default:    unreachable("Unsupported CCS format");
2162          }
2163       } else if (surf->tiling == ISL_TILING_X) {
2164          switch (isl_format_get_layout(surf->format)->bpb) {
2165          case 32:    ccs_format = ISL_FORMAT_GEN7_CCS_32BPP_X;    break;
2166          case 64:    ccs_format = ISL_FORMAT_GEN7_CCS_64BPP_X;    break;
2167          case 128:   ccs_format = ISL_FORMAT_GEN7_CCS_128BPP_X;   break;
2168          default:    unreachable("Unsupported CCS format");
2169          }
2170       } else {
2171          unreachable("Invalid tiling format");
2172       }
2173 
2174       return isl_surf_init(dev, aux_surf,
2175                            .dim = surf->dim,
2176                            .format = ccs_format,
2177                            .width = surf->logical_level0_px.width,
2178                            .height = surf->logical_level0_px.height,
2179                            .depth = surf->logical_level0_px.depth,
2180                            .levels = surf->levels,
2181                            .array_len = surf->logical_level0_px.array_len,
2182                            .samples = 1,
2183                            .row_pitch_B = row_pitch_B,
2184                            .usage = ISL_SURF_USAGE_CCS_BIT,
2185                            .tiling_flags = ISL_TILING_CCS_BIT);
2186    }
2187 }
2188 
2189 #define isl_genX_call(dev, func, ...)              \
2190    switch (ISL_DEV_GEN(dev)) {                     \
2191    case 4:                                         \
2192       /* G45 surface state is the same as gen5 */  \
2193       if (ISL_DEV_IS_G4X(dev)) {                   \
2194          isl_gen5_##func(__VA_ARGS__);             \
2195       } else {                                     \
2196          isl_gen4_##func(__VA_ARGS__);             \
2197       }                                            \
2198       break;                                       \
2199    case 5:                                         \
2200       isl_gen5_##func(__VA_ARGS__);                \
2201       break;                                       \
2202    case 6:                                         \
2203       isl_gen6_##func(__VA_ARGS__);                \
2204       break;                                       \
2205    case 7:                                         \
2206       if (ISL_DEV_IS_HASWELL(dev)) {               \
2207          isl_gen75_##func(__VA_ARGS__);            \
2208       } else {                                     \
2209          isl_gen7_##func(__VA_ARGS__);             \
2210       }                                            \
2211       break;                                       \
2212    case 8:                                         \
2213       isl_gen8_##func(__VA_ARGS__);                \
2214       break;                                       \
2215    case 9:                                         \
2216       isl_gen9_##func(__VA_ARGS__);                \
2217       break;                                       \
2218    case 11:                                        \
2219       isl_gen11_##func(__VA_ARGS__);               \
2220       break;                                       \
2221    case 12:                                        \
2222       isl_gen12_##func(__VA_ARGS__);               \
2223       break;                                       \
2224    default:                                        \
2225       assert(!"Unknown hardware generation");      \
2226    }
2227 
2228 void
isl_surf_fill_state_s(const struct isl_device * dev,void * state,const struct isl_surf_fill_state_info * restrict info)2229 isl_surf_fill_state_s(const struct isl_device *dev, void *state,
2230                       const struct isl_surf_fill_state_info *restrict info)
2231 {
2232 #ifndef NDEBUG
2233    isl_surf_usage_flags_t _base_usage =
2234       info->view->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT |
2235                            ISL_SURF_USAGE_TEXTURE_BIT |
2236                            ISL_SURF_USAGE_STORAGE_BIT);
2237    /* They may only specify one of the above bits at a time */
2238    assert(__builtin_popcount(_base_usage) == 1);
2239    /* The only other allowed bit is ISL_SURF_USAGE_CUBE_BIT */
2240    assert((info->view->usage & ~ISL_SURF_USAGE_CUBE_BIT) == _base_usage);
2241 #endif
2242 
2243    if (info->surf->dim == ISL_SURF_DIM_3D) {
2244       assert(info->view->base_array_layer + info->view->array_len <=
2245              info->surf->logical_level0_px.depth);
2246    } else {
2247       assert(info->view->base_array_layer + info->view->array_len <=
2248              info->surf->logical_level0_px.array_len);
2249    }
2250 
2251    isl_genX_call(dev, surf_fill_state_s, dev, state, info);
2252 }
2253 
2254 void
isl_buffer_fill_state_s(const struct isl_device * dev,void * state,const struct isl_buffer_fill_state_info * restrict info)2255 isl_buffer_fill_state_s(const struct isl_device *dev, void *state,
2256                         const struct isl_buffer_fill_state_info *restrict info)
2257 {
2258    isl_genX_call(dev, buffer_fill_state_s, dev, state, info);
2259 }
2260 
2261 void
isl_null_fill_state(const struct isl_device * dev,void * state,struct isl_extent3d size)2262 isl_null_fill_state(const struct isl_device *dev, void *state,
2263                     struct isl_extent3d size)
2264 {
2265    isl_genX_call(dev, null_fill_state, state, size);
2266 }
2267 
2268 void
isl_emit_depth_stencil_hiz_s(const struct isl_device * dev,void * batch,const struct isl_depth_stencil_hiz_emit_info * restrict info)2269 isl_emit_depth_stencil_hiz_s(const struct isl_device *dev, void *batch,
2270                              const struct isl_depth_stencil_hiz_emit_info *restrict info)
2271 {
2272    if (info->depth_surf && info->stencil_surf) {
2273       if (!dev->info->has_hiz_and_separate_stencil) {
2274          assert(info->depth_surf == info->stencil_surf);
2275          assert(info->depth_address == info->stencil_address);
2276       }
2277       assert(info->depth_surf->dim == info->stencil_surf->dim);
2278    }
2279 
2280    if (info->depth_surf) {
2281       assert((info->depth_surf->usage & ISL_SURF_USAGE_DEPTH_BIT));
2282       if (info->depth_surf->dim == ISL_SURF_DIM_3D) {
2283          assert(info->view->base_array_layer + info->view->array_len <=
2284                 info->depth_surf->logical_level0_px.depth);
2285       } else {
2286          assert(info->view->base_array_layer + info->view->array_len <=
2287                 info->depth_surf->logical_level0_px.array_len);
2288       }
2289    }
2290 
2291    if (info->stencil_surf) {
2292       assert((info->stencil_surf->usage & ISL_SURF_USAGE_STENCIL_BIT));
2293       if (info->stencil_surf->dim == ISL_SURF_DIM_3D) {
2294          assert(info->view->base_array_layer + info->view->array_len <=
2295                 info->stencil_surf->logical_level0_px.depth);
2296       } else {
2297          assert(info->view->base_array_layer + info->view->array_len <=
2298                 info->stencil_surf->logical_level0_px.array_len);
2299       }
2300    }
2301 
2302    isl_genX_call(dev, emit_depth_stencil_hiz_s, dev, batch, info);
2303 }
2304 
2305 /**
2306  * A variant of isl_surf_get_image_offset_sa() specific to
2307  * ISL_DIM_LAYOUT_GEN4_2D.
2308  */
2309 static void
get_image_offset_sa_gen4_2d(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t * x_offset_sa,uint32_t * y_offset_sa)2310 get_image_offset_sa_gen4_2d(const struct isl_surf *surf,
2311                             uint32_t level, uint32_t logical_array_layer,
2312                             uint32_t *x_offset_sa,
2313                             uint32_t *y_offset_sa)
2314 {
2315    assert(level < surf->levels);
2316    if (surf->dim == ISL_SURF_DIM_3D)
2317       assert(logical_array_layer < surf->logical_level0_px.depth);
2318    else
2319       assert(logical_array_layer < surf->logical_level0_px.array_len);
2320 
2321    const struct isl_extent3d image_align_sa =
2322       isl_surf_get_image_alignment_sa(surf);
2323 
2324    const uint32_t W0 = surf->phys_level0_sa.width;
2325    const uint32_t H0 = surf->phys_level0_sa.height;
2326 
2327    const uint32_t phys_layer = logical_array_layer *
2328       (surf->msaa_layout == ISL_MSAA_LAYOUT_ARRAY ? surf->samples : 1);
2329 
2330    uint32_t x = 0;
2331    uint32_t y = phys_layer * isl_surf_get_array_pitch_sa_rows(surf);
2332 
2333    for (uint32_t l = 0; l < level; ++l) {
2334       if (l == 1) {
2335          uint32_t W = isl_minify(W0, l);
2336          x += isl_align_npot(W, image_align_sa.w);
2337       } else {
2338          uint32_t H = isl_minify(H0, l);
2339          y += isl_align_npot(H, image_align_sa.h);
2340       }
2341    }
2342 
2343    *x_offset_sa = x;
2344    *y_offset_sa = y;
2345 }
2346 
2347 /**
2348  * A variant of isl_surf_get_image_offset_sa() specific to
2349  * ISL_DIM_LAYOUT_GEN4_3D.
2350  */
2351 static void
get_image_offset_sa_gen4_3d(const struct isl_surf * surf,uint32_t level,uint32_t logical_z_offset_px,uint32_t * x_offset_sa,uint32_t * y_offset_sa)2352 get_image_offset_sa_gen4_3d(const struct isl_surf *surf,
2353                             uint32_t level, uint32_t logical_z_offset_px,
2354                             uint32_t *x_offset_sa,
2355                             uint32_t *y_offset_sa)
2356 {
2357    assert(level < surf->levels);
2358    if (surf->dim == ISL_SURF_DIM_3D) {
2359       assert(surf->phys_level0_sa.array_len == 1);
2360       assert(logical_z_offset_px < isl_minify(surf->phys_level0_sa.depth, level));
2361    } else {
2362       assert(surf->dim == ISL_SURF_DIM_2D);
2363       assert(surf->usage & ISL_SURF_USAGE_CUBE_BIT);
2364       assert(surf->phys_level0_sa.array_len == 6);
2365       assert(logical_z_offset_px < surf->phys_level0_sa.array_len);
2366    }
2367 
2368    const struct isl_extent3d image_align_sa =
2369       isl_surf_get_image_alignment_sa(surf);
2370 
2371    const uint32_t W0 = surf->phys_level0_sa.width;
2372    const uint32_t H0 = surf->phys_level0_sa.height;
2373    const uint32_t D0 = surf->phys_level0_sa.depth;
2374    const uint32_t AL = surf->phys_level0_sa.array_len;
2375 
2376    uint32_t x = 0;
2377    uint32_t y = 0;
2378 
2379    for (uint32_t l = 0; l < level; ++l) {
2380       const uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa.h);
2381       const uint32_t level_d =
2382          isl_align_npot(surf->dim == ISL_SURF_DIM_3D ? isl_minify(D0, l) : AL,
2383                         image_align_sa.d);
2384       const uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l);
2385 
2386       y += level_h * max_layers_vert;
2387    }
2388 
2389    const uint32_t level_w = isl_align_npot(isl_minify(W0, level), image_align_sa.w);
2390    const uint32_t level_h = isl_align_npot(isl_minify(H0, level), image_align_sa.h);
2391    const uint32_t level_d =
2392       isl_align_npot(surf->dim == ISL_SURF_DIM_3D ? isl_minify(D0, level) : AL,
2393                      image_align_sa.d);
2394 
2395    const uint32_t max_layers_horiz = MIN(level_d, 1u << level);
2396 
2397    x += level_w * (logical_z_offset_px % max_layers_horiz);
2398    y += level_h * (logical_z_offset_px / max_layers_horiz);
2399 
2400    *x_offset_sa = x;
2401    *y_offset_sa = y;
2402 }
2403 
2404 static void
get_image_offset_sa_gen6_stencil_hiz(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t * x_offset_sa,uint32_t * y_offset_sa)2405 get_image_offset_sa_gen6_stencil_hiz(const struct isl_surf *surf,
2406                                      uint32_t level,
2407                                      uint32_t logical_array_layer,
2408                                      uint32_t *x_offset_sa,
2409                                      uint32_t *y_offset_sa)
2410 {
2411    assert(level < surf->levels);
2412    assert(surf->logical_level0_px.depth == 1);
2413    assert(logical_array_layer < surf->logical_level0_px.array_len);
2414 
2415    const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
2416 
2417    const struct isl_extent3d image_align_sa =
2418       isl_surf_get_image_alignment_sa(surf);
2419 
2420    struct isl_tile_info tile_info;
2421    isl_tiling_get_info(surf->tiling, fmtl->bpb, &tile_info);
2422    const struct isl_extent2d tile_extent_sa = {
2423       .w = tile_info.logical_extent_el.w * fmtl->bw,
2424       .h = tile_info.logical_extent_el.h * fmtl->bh,
2425    };
2426    /* Tile size is a multiple of image alignment */
2427    assert(tile_extent_sa.w % image_align_sa.w == 0);
2428    assert(tile_extent_sa.h % image_align_sa.h == 0);
2429 
2430    const uint32_t W0 = surf->phys_level0_sa.w;
2431    const uint32_t H0 = surf->phys_level0_sa.h;
2432 
2433    /* Each image has the same height as LOD0 because the hardware thinks
2434     * everything is LOD0
2435     */
2436    const uint32_t H = isl_align(H0, image_align_sa.h);
2437 
2438    /* Quick sanity check for consistency */
2439    if (surf->phys_level0_sa.array_len > 1)
2440       assert(surf->array_pitch_el_rows == isl_assert_div(H, fmtl->bh));
2441 
2442    uint32_t x = 0, y = 0;
2443    for (uint32_t l = 0; l < level; ++l) {
2444       const uint32_t W = isl_minify(W0, l);
2445 
2446       const uint32_t w = isl_align(W, tile_extent_sa.w);
2447       const uint32_t h = isl_align(H * surf->phys_level0_sa.a,
2448                                    tile_extent_sa.h);
2449 
2450       if (l == 0) {
2451          y += h;
2452       } else {
2453          x += w;
2454       }
2455    }
2456 
2457    y += H * logical_array_layer;
2458 
2459    *x_offset_sa = x;
2460    *y_offset_sa = y;
2461 }
2462 
2463 /**
2464  * A variant of isl_surf_get_image_offset_sa() specific to
2465  * ISL_DIM_LAYOUT_GEN9_1D.
2466  */
2467 static void
get_image_offset_sa_gen9_1d(const struct isl_surf * surf,uint32_t level,uint32_t layer,uint32_t * x_offset_sa,uint32_t * y_offset_sa)2468 get_image_offset_sa_gen9_1d(const struct isl_surf *surf,
2469                             uint32_t level, uint32_t layer,
2470                             uint32_t *x_offset_sa,
2471                             uint32_t *y_offset_sa)
2472 {
2473    assert(level < surf->levels);
2474    assert(layer < surf->phys_level0_sa.array_len);
2475    assert(surf->phys_level0_sa.height == 1);
2476    assert(surf->phys_level0_sa.depth == 1);
2477    assert(surf->samples == 1);
2478 
2479    const uint32_t W0 = surf->phys_level0_sa.width;
2480    const struct isl_extent3d image_align_sa =
2481       isl_surf_get_image_alignment_sa(surf);
2482 
2483    uint32_t x = 0;
2484 
2485    for (uint32_t l = 0; l < level; ++l) {
2486       uint32_t W = isl_minify(W0, l);
2487       uint32_t w = isl_align_npot(W, image_align_sa.w);
2488 
2489       x += w;
2490    }
2491 
2492    *x_offset_sa = x;
2493    *y_offset_sa = layer * isl_surf_get_array_pitch_sa_rows(surf);
2494 }
2495 
2496 /**
2497  * Calculate the offset, in units of surface samples, to a subimage in the
2498  * surface.
2499  *
2500  * @invariant level < surface levels
2501  * @invariant logical_array_layer < logical array length of surface
2502  * @invariant logical_z_offset_px < logical depth of surface at level
2503  */
2504 void
isl_surf_get_image_offset_sa(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t logical_z_offset_px,uint32_t * x_offset_sa,uint32_t * y_offset_sa)2505 isl_surf_get_image_offset_sa(const struct isl_surf *surf,
2506                              uint32_t level,
2507                              uint32_t logical_array_layer,
2508                              uint32_t logical_z_offset_px,
2509                              uint32_t *x_offset_sa,
2510                              uint32_t *y_offset_sa)
2511 {
2512    assert(level < surf->levels);
2513    assert(logical_array_layer < surf->logical_level0_px.array_len);
2514    assert(logical_z_offset_px
2515           < isl_minify(surf->logical_level0_px.depth, level));
2516 
2517    switch (surf->dim_layout) {
2518    case ISL_DIM_LAYOUT_GEN9_1D:
2519       get_image_offset_sa_gen9_1d(surf, level, logical_array_layer,
2520                                   x_offset_sa, y_offset_sa);
2521       break;
2522    case ISL_DIM_LAYOUT_GEN4_2D:
2523       get_image_offset_sa_gen4_2d(surf, level, logical_array_layer
2524                                   + logical_z_offset_px,
2525                                   x_offset_sa, y_offset_sa);
2526       break;
2527    case ISL_DIM_LAYOUT_GEN4_3D:
2528       get_image_offset_sa_gen4_3d(surf, level, logical_array_layer +
2529                                   logical_z_offset_px,
2530                                   x_offset_sa, y_offset_sa);
2531       break;
2532    case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
2533       get_image_offset_sa_gen6_stencil_hiz(surf, level, logical_array_layer +
2534                                            logical_z_offset_px,
2535                                            x_offset_sa, y_offset_sa);
2536       break;
2537 
2538    default:
2539       unreachable("not reached");
2540    }
2541 }
2542 
2543 void
isl_surf_get_image_offset_el(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t logical_z_offset_px,uint32_t * x_offset_el,uint32_t * y_offset_el)2544 isl_surf_get_image_offset_el(const struct isl_surf *surf,
2545                              uint32_t level,
2546                              uint32_t logical_array_layer,
2547                              uint32_t logical_z_offset_px,
2548                              uint32_t *x_offset_el,
2549                              uint32_t *y_offset_el)
2550 {
2551    const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
2552 
2553    assert(level < surf->levels);
2554    assert(logical_array_layer < surf->logical_level0_px.array_len);
2555    assert(logical_z_offset_px
2556           < isl_minify(surf->logical_level0_px.depth, level));
2557 
2558    uint32_t x_offset_sa, y_offset_sa;
2559    isl_surf_get_image_offset_sa(surf, level,
2560                                 logical_array_layer,
2561                                 logical_z_offset_px,
2562                                 &x_offset_sa,
2563                                 &y_offset_sa);
2564 
2565    *x_offset_el = x_offset_sa / fmtl->bw;
2566    *y_offset_el = y_offset_sa / fmtl->bh;
2567 }
2568 
2569 void
isl_surf_get_image_offset_B_tile_sa(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t logical_z_offset_px,uint32_t * offset_B,uint32_t * x_offset_sa,uint32_t * y_offset_sa)2570 isl_surf_get_image_offset_B_tile_sa(const struct isl_surf *surf,
2571                                     uint32_t level,
2572                                     uint32_t logical_array_layer,
2573                                     uint32_t logical_z_offset_px,
2574                                     uint32_t *offset_B,
2575                                     uint32_t *x_offset_sa,
2576                                     uint32_t *y_offset_sa)
2577 {
2578    const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
2579 
2580    uint32_t total_x_offset_el, total_y_offset_el;
2581    isl_surf_get_image_offset_el(surf, level, logical_array_layer,
2582                                 logical_z_offset_px,
2583                                 &total_x_offset_el,
2584                                 &total_y_offset_el);
2585 
2586    uint32_t x_offset_el, y_offset_el;
2587    isl_tiling_get_intratile_offset_el(surf->tiling, fmtl->bpb,
2588                                       surf->row_pitch_B,
2589                                       total_x_offset_el,
2590                                       total_y_offset_el,
2591                                       offset_B,
2592                                       &x_offset_el,
2593                                       &y_offset_el);
2594 
2595    if (x_offset_sa) {
2596       *x_offset_sa = x_offset_el * fmtl->bw;
2597    } else {
2598       assert(x_offset_el == 0);
2599    }
2600 
2601    if (y_offset_sa) {
2602       *y_offset_sa = y_offset_el * fmtl->bh;
2603    } else {
2604       assert(y_offset_el == 0);
2605    }
2606 }
2607 
2608 void
isl_surf_get_image_range_B_tile(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t logical_z_offset_px,uint32_t * start_tile_B,uint32_t * end_tile_B)2609 isl_surf_get_image_range_B_tile(const struct isl_surf *surf,
2610                                 uint32_t level,
2611                                 uint32_t logical_array_layer,
2612                                 uint32_t logical_z_offset_px,
2613                                 uint32_t *start_tile_B,
2614                                 uint32_t *end_tile_B)
2615 {
2616    uint32_t start_x_offset_el, start_y_offset_el;
2617    isl_surf_get_image_offset_el(surf, level, logical_array_layer,
2618                                 logical_z_offset_px,
2619                                 &start_x_offset_el,
2620                                 &start_y_offset_el);
2621 
2622    /* Compute the size of the subimage in surface elements */
2623    const uint32_t subimage_w_sa = isl_minify(surf->phys_level0_sa.w, level);
2624    const uint32_t subimage_h_sa = isl_minify(surf->phys_level0_sa.h, level);
2625    const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
2626    const uint32_t subimage_w_el = isl_align_div_npot(subimage_w_sa, fmtl->bw);
2627    const uint32_t subimage_h_el = isl_align_div_npot(subimage_h_sa, fmtl->bh);
2628 
2629    /* Find the last pixel */
2630    uint32_t end_x_offset_el = start_x_offset_el + subimage_w_el - 1;
2631    uint32_t end_y_offset_el = start_y_offset_el + subimage_h_el - 1;
2632 
2633    UNUSED uint32_t x_offset_el, y_offset_el;
2634    isl_tiling_get_intratile_offset_el(surf->tiling, fmtl->bpb,
2635                                       surf->row_pitch_B,
2636                                       start_x_offset_el,
2637                                       start_y_offset_el,
2638                                       start_tile_B,
2639                                       &x_offset_el,
2640                                       &y_offset_el);
2641 
2642    isl_tiling_get_intratile_offset_el(surf->tiling, fmtl->bpb,
2643                                       surf->row_pitch_B,
2644                                       end_x_offset_el,
2645                                       end_y_offset_el,
2646                                       end_tile_B,
2647                                       &x_offset_el,
2648                                       &y_offset_el);
2649 
2650    /* We want the range we return to be exclusive but the tile containing the
2651     * last pixel (what we just calculated) is inclusive.  Add one.
2652     */
2653    (*end_tile_B)++;
2654 
2655    assert(*end_tile_B <= surf->size_B);
2656 }
2657 
2658 void
isl_surf_get_image_surf(const struct isl_device * dev,const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t logical_z_offset_px,struct isl_surf * image_surf,uint32_t * offset_B,uint32_t * x_offset_sa,uint32_t * y_offset_sa)2659 isl_surf_get_image_surf(const struct isl_device *dev,
2660                         const struct isl_surf *surf,
2661                         uint32_t level,
2662                         uint32_t logical_array_layer,
2663                         uint32_t logical_z_offset_px,
2664                         struct isl_surf *image_surf,
2665                         uint32_t *offset_B,
2666                         uint32_t *x_offset_sa,
2667                         uint32_t *y_offset_sa)
2668 {
2669    isl_surf_get_image_offset_B_tile_sa(surf,
2670                                        level,
2671                                        logical_array_layer,
2672                                        logical_z_offset_px,
2673                                        offset_B,
2674                                        x_offset_sa,
2675                                        y_offset_sa);
2676 
2677    /* Even for cube maps there will be only single face, therefore drop the
2678     * corresponding flag if present.
2679     */
2680    const isl_surf_usage_flags_t usage =
2681       surf->usage & (~ISL_SURF_USAGE_CUBE_BIT);
2682 
2683    bool ok UNUSED;
2684    ok = isl_surf_init(dev, image_surf,
2685                       .dim = ISL_SURF_DIM_2D,
2686                       .format = surf->format,
2687                       .width = isl_minify(surf->logical_level0_px.w, level),
2688                       .height = isl_minify(surf->logical_level0_px.h, level),
2689                       .depth = 1,
2690                       .levels = 1,
2691                       .array_len = 1,
2692                       .samples = surf->samples,
2693                       .row_pitch_B = surf->row_pitch_B,
2694                       .usage = usage,
2695                       .tiling_flags = (1 << surf->tiling));
2696    assert(ok);
2697 }
2698 
2699 void
isl_tiling_get_intratile_offset_el(enum isl_tiling tiling,uint32_t bpb,uint32_t row_pitch_B,uint32_t total_x_offset_el,uint32_t total_y_offset_el,uint32_t * base_address_offset,uint32_t * x_offset_el,uint32_t * y_offset_el)2700 isl_tiling_get_intratile_offset_el(enum isl_tiling tiling,
2701                                    uint32_t bpb,
2702                                    uint32_t row_pitch_B,
2703                                    uint32_t total_x_offset_el,
2704                                    uint32_t total_y_offset_el,
2705                                    uint32_t *base_address_offset,
2706                                    uint32_t *x_offset_el,
2707                                    uint32_t *y_offset_el)
2708 {
2709    if (tiling == ISL_TILING_LINEAR) {
2710       assert(bpb % 8 == 0);
2711       *base_address_offset = total_y_offset_el * row_pitch_B +
2712                              total_x_offset_el * (bpb / 8);
2713       *x_offset_el = 0;
2714       *y_offset_el = 0;
2715       return;
2716    }
2717 
2718    struct isl_tile_info tile_info;
2719    isl_tiling_get_info(tiling, bpb, &tile_info);
2720 
2721    assert(row_pitch_B % tile_info.phys_extent_B.width == 0);
2722 
2723    /* For non-power-of-two formats, we need the address to be both tile and
2724     * element-aligned.  The easiest way to achieve this is to work with a tile
2725     * that is three times as wide as the regular tile.
2726     *
2727     * The tile info returned by get_tile_info has a logical size that is an
2728     * integer number of tile_info.format_bpb size elements.  To scale the
2729     * tile, we scale up the physical width and then treat the logical tile
2730     * size as if it has bpb size elements.
2731     */
2732    const uint32_t tile_el_scale = bpb / tile_info.format_bpb;
2733    tile_info.phys_extent_B.width *= tile_el_scale;
2734 
2735    /* Compute the offset into the tile */
2736    *x_offset_el = total_x_offset_el % tile_info.logical_extent_el.w;
2737    *y_offset_el = total_y_offset_el % tile_info.logical_extent_el.h;
2738 
2739    /* Compute the offset of the tile in units of whole tiles */
2740    uint32_t x_offset_tl = total_x_offset_el / tile_info.logical_extent_el.w;
2741    uint32_t y_offset_tl = total_y_offset_el / tile_info.logical_extent_el.h;
2742 
2743    *base_address_offset =
2744       y_offset_tl * tile_info.phys_extent_B.h * row_pitch_B +
2745       x_offset_tl * tile_info.phys_extent_B.h * tile_info.phys_extent_B.w;
2746 }
2747 
2748 uint32_t
isl_surf_get_depth_format(const struct isl_device * dev,const struct isl_surf * surf)2749 isl_surf_get_depth_format(const struct isl_device *dev,
2750                           const struct isl_surf *surf)
2751 {
2752    /* Support for separate stencil buffers began in gen5. Support for
2753     * interleaved depthstencil buffers ceased in gen7. The intermediate gens,
2754     * those that supported separate and interleaved stencil, were gen5 and
2755     * gen6.
2756     *
2757     * For a list of all available formats, see the Sandybridge PRM >> Volume
2758     * 2 Part 1: 3D/Media - 3D Pipeline >> 3DSTATE_DEPTH_BUFFER >> Surface
2759     * Format (p321).
2760     */
2761 
2762    bool has_stencil = surf->usage & ISL_SURF_USAGE_STENCIL_BIT;
2763 
2764    assert(surf->usage & ISL_SURF_USAGE_DEPTH_BIT);
2765 
2766    if (has_stencil)
2767       assert(ISL_DEV_GEN(dev) < 7);
2768 
2769    switch (surf->format) {
2770    default:
2771       unreachable("bad isl depth format");
2772    case ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS:
2773       assert(ISL_DEV_GEN(dev) < 7);
2774       return 0; /* D32_FLOAT_S8X24_UINT */
2775    case ISL_FORMAT_R32_FLOAT:
2776       assert(!has_stencil);
2777       return 1; /* D32_FLOAT */
2778    case ISL_FORMAT_R24_UNORM_X8_TYPELESS:
2779       if (has_stencil) {
2780          assert(ISL_DEV_GEN(dev) < 7);
2781          return 2; /* D24_UNORM_S8_UINT */
2782       } else {
2783          assert(ISL_DEV_GEN(dev) >= 5);
2784          return 3; /* D24_UNORM_X8_UINT */
2785       }
2786    case ISL_FORMAT_R16_UNORM:
2787       assert(!has_stencil);
2788       return 5; /* D16_UNORM */
2789    }
2790 }
2791 
2792 bool
isl_swizzle_supports_rendering(const struct gen_device_info * devinfo,struct isl_swizzle swizzle)2793 isl_swizzle_supports_rendering(const struct gen_device_info *devinfo,
2794                                struct isl_swizzle swizzle)
2795 {
2796    if (devinfo->is_haswell) {
2797       /* From the Haswell PRM,
2798        * RENDER_SURFACE_STATE::Shader Channel Select Red
2799        *
2800        *    "The Shader channel selects also define which shader channels are
2801        *    written to which surface channel. If the Shader channel select is
2802        *    SCS_ZERO or SCS_ONE then it is not written to the surface. If the
2803        *    shader channel select is SCS_RED it is written to the surface red
2804        *    channel and so on. If more than one shader channel select is set
2805        *    to the same surface channel only the first shader channel in RGBA
2806        *    order will be written."
2807        */
2808       return true;
2809    } else if (devinfo->gen <= 7) {
2810       /* Ivy Bridge and early doesn't have any swizzling */
2811       return isl_swizzle_is_identity(swizzle);
2812    } else {
2813       /* From the Sky Lake PRM Vol. 2d,
2814        * RENDER_SURFACE_STATE::Shader Channel Select Red
2815        *
2816        *    "For Render Target, Red, Green and Blue Shader Channel Selects
2817        *    MUST be such that only valid components can be swapped i.e. only
2818        *    change the order of components in the pixel. Any other values for
2819        *    these Shader Channel Select fields are not valid for Render
2820        *    Targets. This also means that there MUST not be multiple shader
2821        *    channels mapped to the same RT channel."
2822        *
2823        * From the Sky Lake PRM Vol. 2d,
2824        * RENDER_SURFACE_STATE::Shader Channel Select Alpha
2825        *
2826        *    "For Render Target, this field MUST be programmed to
2827        *    value = SCS_ALPHA."
2828        */
2829       return (swizzle.r == ISL_CHANNEL_SELECT_RED ||
2830               swizzle.r == ISL_CHANNEL_SELECT_GREEN ||
2831               swizzle.r == ISL_CHANNEL_SELECT_BLUE) &&
2832              (swizzle.g == ISL_CHANNEL_SELECT_RED ||
2833               swizzle.g == ISL_CHANNEL_SELECT_GREEN ||
2834               swizzle.g == ISL_CHANNEL_SELECT_BLUE) &&
2835              (swizzle.b == ISL_CHANNEL_SELECT_RED ||
2836               swizzle.b == ISL_CHANNEL_SELECT_GREEN ||
2837               swizzle.b == ISL_CHANNEL_SELECT_BLUE) &&
2838              swizzle.r != swizzle.g &&
2839              swizzle.r != swizzle.b &&
2840              swizzle.g != swizzle.b &&
2841              swizzle.a == ISL_CHANNEL_SELECT_ALPHA;
2842    }
2843 }
2844 
2845 static enum isl_channel_select
swizzle_select(enum isl_channel_select chan,struct isl_swizzle swizzle)2846 swizzle_select(enum isl_channel_select chan, struct isl_swizzle swizzle)
2847 {
2848    switch (chan) {
2849    case ISL_CHANNEL_SELECT_ZERO:
2850    case ISL_CHANNEL_SELECT_ONE:
2851       return chan;
2852    case ISL_CHANNEL_SELECT_RED:
2853       return swizzle.r;
2854    case ISL_CHANNEL_SELECT_GREEN:
2855       return swizzle.g;
2856    case ISL_CHANNEL_SELECT_BLUE:
2857       return swizzle.b;
2858    case ISL_CHANNEL_SELECT_ALPHA:
2859       return swizzle.a;
2860    default:
2861       unreachable("Invalid swizzle component");
2862    }
2863 }
2864 
2865 /**
2866  * Returns the single swizzle that is equivalent to applying the two given
2867  * swizzles in sequence.
2868  */
2869 struct isl_swizzle
isl_swizzle_compose(struct isl_swizzle first,struct isl_swizzle second)2870 isl_swizzle_compose(struct isl_swizzle first, struct isl_swizzle second)
2871 {
2872    return (struct isl_swizzle) {
2873       .r = swizzle_select(first.r, second),
2874       .g = swizzle_select(first.g, second),
2875       .b = swizzle_select(first.b, second),
2876       .a = swizzle_select(first.a, second),
2877    };
2878 }
2879 
2880 /**
2881  * Returns a swizzle that is the pseudo-inverse of this swizzle.
2882  */
2883 struct isl_swizzle
isl_swizzle_invert(struct isl_swizzle swizzle)2884 isl_swizzle_invert(struct isl_swizzle swizzle)
2885 {
2886    /* Default to zero for channels which do not show up in the swizzle */
2887    enum isl_channel_select chans[4] = {
2888       ISL_CHANNEL_SELECT_ZERO,
2889       ISL_CHANNEL_SELECT_ZERO,
2890       ISL_CHANNEL_SELECT_ZERO,
2891       ISL_CHANNEL_SELECT_ZERO,
2892    };
2893 
2894    /* We go in ABGR order so that, if there are any duplicates, the first one
2895     * is taken if you look at it in RGBA order.  This is what Haswell hardware
2896     * does for render target swizzles.
2897     */
2898    if ((unsigned)(swizzle.a - ISL_CHANNEL_SELECT_RED) < 4)
2899       chans[swizzle.a - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_ALPHA;
2900    if ((unsigned)(swizzle.b - ISL_CHANNEL_SELECT_RED) < 4)
2901       chans[swizzle.b - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_BLUE;
2902    if ((unsigned)(swizzle.g - ISL_CHANNEL_SELECT_RED) < 4)
2903       chans[swizzle.g - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_GREEN;
2904    if ((unsigned)(swizzle.r - ISL_CHANNEL_SELECT_RED) < 4)
2905       chans[swizzle.r - ISL_CHANNEL_SELECT_RED] = ISL_CHANNEL_SELECT_RED;
2906 
2907    return (struct isl_swizzle) { chans[0], chans[1], chans[2], chans[3] };
2908 }
2909 
2910 /** Applies an inverse swizzle to a color value */
2911 union isl_color_value
isl_color_value_swizzle_inv(union isl_color_value src,struct isl_swizzle swizzle)2912 isl_color_value_swizzle_inv(union isl_color_value src,
2913                             struct isl_swizzle swizzle)
2914 {
2915    union isl_color_value dst = { .u32 = { 0, } };
2916 
2917    /* We assign colors in ABGR order so that the first one will be taken in
2918     * RGBA precedence order.  According to the PRM docs for shader channel
2919     * select, this matches Haswell hardware behavior.
2920     */
2921    if ((unsigned)(swizzle.a - ISL_CHANNEL_SELECT_RED) < 4)
2922       dst.u32[swizzle.a - ISL_CHANNEL_SELECT_RED] = src.u32[3];
2923    if ((unsigned)(swizzle.b - ISL_CHANNEL_SELECT_RED) < 4)
2924       dst.u32[swizzle.b - ISL_CHANNEL_SELECT_RED] = src.u32[2];
2925    if ((unsigned)(swizzle.g - ISL_CHANNEL_SELECT_RED) < 4)
2926       dst.u32[swizzle.g - ISL_CHANNEL_SELECT_RED] = src.u32[1];
2927    if ((unsigned)(swizzle.r - ISL_CHANNEL_SELECT_RED) < 4)
2928       dst.u32[swizzle.r - ISL_CHANNEL_SELECT_RED] = src.u32[0];
2929 
2930    return dst;
2931 }
2932 
2933 uint8_t
isl_format_get_aux_map_encoding(enum isl_format format)2934 isl_format_get_aux_map_encoding(enum isl_format format)
2935 {
2936    switch(format) {
2937    case ISL_FORMAT_R32G32B32A32_FLOAT: return 0x11;
2938    case ISL_FORMAT_R32G32B32X32_FLOAT: return 0x11;
2939    case ISL_FORMAT_R32G32B32A32_SINT: return 0x12;
2940    case ISL_FORMAT_R32G32B32A32_UINT: return 0x13;
2941    case ISL_FORMAT_R16G16B16A16_UNORM: return 0x14;
2942    case ISL_FORMAT_R16G16B16A16_SNORM: return 0x15;
2943    case ISL_FORMAT_R16G16B16A16_SINT: return 0x16;
2944    case ISL_FORMAT_R16G16B16A16_UINT: return 0x17;
2945    case ISL_FORMAT_R16G16B16A16_FLOAT: return 0x10;
2946    case ISL_FORMAT_R16G16B16X16_FLOAT: return 0x10;
2947    case ISL_FORMAT_R32G32_FLOAT: return 0x11;
2948    case ISL_FORMAT_R32G32_SINT: return 0x12;
2949    case ISL_FORMAT_R32G32_UINT: return 0x13;
2950    case ISL_FORMAT_B8G8R8A8_UNORM: return 0xA;
2951    case ISL_FORMAT_B8G8R8X8_UNORM: return 0xA;
2952    case ISL_FORMAT_B8G8R8A8_UNORM_SRGB: return 0xA;
2953    case ISL_FORMAT_B8G8R8X8_UNORM_SRGB: return 0xA;
2954    case ISL_FORMAT_R10G10B10A2_UNORM: return 0x18;
2955    case ISL_FORMAT_R10G10B10A2_UNORM_SRGB: return 0x18;
2956    case ISL_FORMAT_R10G10B10_FLOAT_A2_UNORM: return 0x19;
2957    case ISL_FORMAT_R10G10B10A2_UINT: return 0x1A;
2958    case ISL_FORMAT_R8G8B8A8_UNORM: return 0xA;
2959    case ISL_FORMAT_R8G8B8A8_UNORM_SRGB: return 0xA;
2960    case ISL_FORMAT_R8G8B8A8_SNORM: return 0x1B;
2961    case ISL_FORMAT_R8G8B8A8_SINT: return 0x1C;
2962    case ISL_FORMAT_R8G8B8A8_UINT: return 0x1D;
2963    case ISL_FORMAT_R16G16_UNORM: return 0x14;
2964    case ISL_FORMAT_R16G16_SNORM: return 0x15;
2965    case ISL_FORMAT_R16G16_SINT: return 0x16;
2966    case ISL_FORMAT_R16G16_UINT: return 0x17;
2967    case ISL_FORMAT_R16G16_FLOAT: return 0x10;
2968    case ISL_FORMAT_B10G10R10A2_UNORM: return 0x18;
2969    case ISL_FORMAT_B10G10R10A2_UNORM_SRGB: return 0x18;
2970    case ISL_FORMAT_R11G11B10_FLOAT: return 0x1E;
2971    case ISL_FORMAT_R32_SINT: return 0x12;
2972    case ISL_FORMAT_R32_UINT: return 0x13;
2973    case ISL_FORMAT_R32_FLOAT: return 0x11;
2974    case ISL_FORMAT_R24_UNORM_X8_TYPELESS: return 0x13;
2975    case ISL_FORMAT_B5G6R5_UNORM: return 0xA;
2976    case ISL_FORMAT_B5G6R5_UNORM_SRGB: return 0xA;
2977    case ISL_FORMAT_B5G5R5A1_UNORM: return 0xA;
2978    case ISL_FORMAT_B5G5R5A1_UNORM_SRGB: return 0xA;
2979    case ISL_FORMAT_B4G4R4A4_UNORM: return 0xA;
2980    case ISL_FORMAT_B4G4R4A4_UNORM_SRGB: return 0xA;
2981    case ISL_FORMAT_R8G8_UNORM: return 0xA;
2982    case ISL_FORMAT_R8G8_SNORM: return 0x1B;
2983    case ISL_FORMAT_R8G8_SINT: return 0x1C;
2984    case ISL_FORMAT_R8G8_UINT: return 0x1D;
2985    case ISL_FORMAT_R16_UNORM: return 0x14;
2986    case ISL_FORMAT_R16_SNORM: return 0x15;
2987    case ISL_FORMAT_R16_SINT: return 0x16;
2988    case ISL_FORMAT_R16_UINT: return 0x17;
2989    case ISL_FORMAT_R16_FLOAT: return 0x10;
2990    case ISL_FORMAT_B5G5R5X1_UNORM: return 0xA;
2991    case ISL_FORMAT_B5G5R5X1_UNORM_SRGB: return 0xA;
2992    case ISL_FORMAT_A1B5G5R5_UNORM: return 0xA;
2993    case ISL_FORMAT_A4B4G4R4_UNORM: return 0xA;
2994    case ISL_FORMAT_R8_UNORM: return 0xA;
2995    case ISL_FORMAT_R8_SNORM: return 0x1B;
2996    case ISL_FORMAT_R8_SINT: return 0x1C;
2997    case ISL_FORMAT_R8_UINT: return 0x1D;
2998    case ISL_FORMAT_A8_UNORM: return 0xA;
2999    case ISL_FORMAT_PLANAR_420_8: return 0xF;
3000    case ISL_FORMAT_PLANAR_420_10: return 0x7;
3001    case ISL_FORMAT_PLANAR_420_12: return 0x8;
3002    case ISL_FORMAT_PLANAR_420_16: return 0x8;
3003    case ISL_FORMAT_YCRCB_NORMAL: return 0x3;
3004    case ISL_FORMAT_YCRCB_SWAPY: return 0xB;
3005    default:
3006       unreachable("Unsupported aux-map format!");
3007       return 0;
3008    }
3009 }
3010