1 /*
2  * Copyright 2015 Intel Corporation
3  *
4  *  Permission is hereby granted, free of charge, to any person obtaining a
5  *  copy of this software and associated documentation files (the "Software"),
6  *  to deal in the Software without restriction, including without limitation
7  *  the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  *  and/or sell copies of the Software, and to permit persons to whom the
9  *  Software is furnished to do so, subject to the following conditions:
10  *
11  *  The above copyright notice and this permission notice (including the next
12  *  paragraph) shall be included in all copies or substantial portions of the
13  *  Software.
14  *
15  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  *  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  *  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  *  IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdarg.h>
26 #include <stdio.h>
27 
28 #include "genxml/genX_bits.h"
29 
30 #include "isl.h"
31 #include "isl_gen4.h"
32 #include "isl_gen6.h"
33 #include "isl_gen7.h"
34 #include "isl_gen8.h"
35 #include "isl_gen9.h"
36 #include "isl_priv.h"
37 
38 void PRINTFLIKE(3, 4) UNUSED
__isl_finishme(const char * file,int line,const char * fmt,...)39 __isl_finishme(const char *file, int line, const char *fmt, ...)
40 {
41    va_list ap;
42    char buf[512];
43 
44    va_start(ap, fmt);
45    vsnprintf(buf, sizeof(buf), fmt, ap);
46    va_end(ap);
47 
48    fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buf);
49 }
50 
51 void
isl_device_init(struct isl_device * dev,const struct gen_device_info * info,bool has_bit6_swizzling)52 isl_device_init(struct isl_device *dev,
53                 const struct gen_device_info *info,
54                 bool has_bit6_swizzling)
55 {
56    dev->info = info;
57    dev->use_separate_stencil = ISL_DEV_GEN(dev) >= 6;
58    dev->has_bit6_swizzling = has_bit6_swizzling;
59 
60    /* The ISL_DEV macros may be defined in the CFLAGS, thus hardcoding some
61     * device properties at buildtime. Verify that the macros with the device
62     * properties chosen during runtime.
63     */
64    ISL_DEV_GEN_SANITIZE(dev);
65    ISL_DEV_USE_SEPARATE_STENCIL_SANITIZE(dev);
66 
67    /* Did we break hiz or stencil? */
68    if (ISL_DEV_USE_SEPARATE_STENCIL(dev))
69       assert(info->has_hiz_and_separate_stencil);
70    if (info->must_use_separate_stencil)
71       assert(ISL_DEV_USE_SEPARATE_STENCIL(dev));
72 
73    dev->ss.size = RENDER_SURFACE_STATE_length(info) * 4;
74    dev->ss.align = isl_align(dev->ss.size, 32);
75 
76    dev->ss.clear_value_size =
77       isl_align(RENDER_SURFACE_STATE_RedClearColor_bits(info) +
78                 RENDER_SURFACE_STATE_GreenClearColor_bits(info) +
79                 RENDER_SURFACE_STATE_BlueClearColor_bits(info) +
80                 RENDER_SURFACE_STATE_AlphaClearColor_bits(info), 32) / 8;
81 
82    dev->ss.clear_value_offset =
83       RENDER_SURFACE_STATE_RedClearColor_start(info) / 32 * 4;
84 
85    assert(RENDER_SURFACE_STATE_SurfaceBaseAddress_start(info) % 8 == 0);
86    dev->ss.addr_offset =
87       RENDER_SURFACE_STATE_SurfaceBaseAddress_start(info) / 8;
88 
89    /* The "Auxiliary Surface Base Address" field starts a bit higher up
90     * because the bottom 12 bits are used for other things.  Round down to
91     * the nearest dword before.
92     */
93    dev->ss.aux_addr_offset =
94       (RENDER_SURFACE_STATE_AuxiliarySurfaceBaseAddress_start(info) & ~31) / 8;
95 
96    dev->ds.size = _3DSTATE_DEPTH_BUFFER_length(info) * 4;
97    assert(_3DSTATE_DEPTH_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0);
98    dev->ds.depth_offset =
99       _3DSTATE_DEPTH_BUFFER_SurfaceBaseAddress_start(info) / 8;
100 
101    if (dev->use_separate_stencil) {
102       dev->ds.size += _3DSTATE_STENCIL_BUFFER_length(info) * 4 +
103                       _3DSTATE_HIER_DEPTH_BUFFER_length(info) * 4 +
104                       _3DSTATE_CLEAR_PARAMS_length(info) * 4;
105 
106       assert(_3DSTATE_STENCIL_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0);
107       dev->ds.stencil_offset =
108          _3DSTATE_DEPTH_BUFFER_length(info) * 4 +
109          _3DSTATE_STENCIL_BUFFER_SurfaceBaseAddress_start(info) / 8;
110 
111       assert(_3DSTATE_HIER_DEPTH_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0);
112       dev->ds.hiz_offset =
113          _3DSTATE_DEPTH_BUFFER_length(info) * 4 +
114          _3DSTATE_STENCIL_BUFFER_length(info) * 4 +
115          _3DSTATE_HIER_DEPTH_BUFFER_SurfaceBaseAddress_start(info) / 8;
116    } else {
117       dev->ds.stencil_offset = 0;
118       dev->ds.hiz_offset = 0;
119    }
120 }
121 
122 /**
123  * @brief Query the set of multisamples supported by the device.
124  *
125  * This function always returns non-zero, as ISL_SAMPLE_COUNT_1_BIT is always
126  * supported.
127  */
128 isl_sample_count_mask_t ATTRIBUTE_CONST
isl_device_get_sample_counts(struct isl_device * dev)129 isl_device_get_sample_counts(struct isl_device *dev)
130 {
131    if (ISL_DEV_GEN(dev) >= 9) {
132       return ISL_SAMPLE_COUNT_1_BIT |
133              ISL_SAMPLE_COUNT_2_BIT |
134              ISL_SAMPLE_COUNT_4_BIT |
135              ISL_SAMPLE_COUNT_8_BIT |
136              ISL_SAMPLE_COUNT_16_BIT;
137    } else if (ISL_DEV_GEN(dev) >= 8) {
138       return ISL_SAMPLE_COUNT_1_BIT |
139              ISL_SAMPLE_COUNT_2_BIT |
140              ISL_SAMPLE_COUNT_4_BIT |
141              ISL_SAMPLE_COUNT_8_BIT;
142    } else if (ISL_DEV_GEN(dev) >= 7) {
143       return ISL_SAMPLE_COUNT_1_BIT |
144              ISL_SAMPLE_COUNT_4_BIT |
145              ISL_SAMPLE_COUNT_8_BIT;
146    } else if (ISL_DEV_GEN(dev) >= 6) {
147       return ISL_SAMPLE_COUNT_1_BIT |
148              ISL_SAMPLE_COUNT_4_BIT;
149    } else {
150       return ISL_SAMPLE_COUNT_1_BIT;
151    }
152 }
153 
154 /**
155  * @param[out] info is written only on success
156  */
157 static void
isl_tiling_get_info(enum isl_tiling tiling,uint32_t format_bpb,struct isl_tile_info * tile_info)158 isl_tiling_get_info(enum isl_tiling tiling,
159                     uint32_t format_bpb,
160                     struct isl_tile_info *tile_info)
161 {
162    const uint32_t bs = format_bpb / 8;
163    struct isl_extent2d logical_el, phys_B;
164 
165    if (tiling != ISL_TILING_LINEAR && !isl_is_pow2(format_bpb)) {
166       /* It is possible to have non-power-of-two formats in a tiled buffer.
167        * The easiest way to handle this is to treat the tile as if it is three
168        * times as wide.  This way no pixel will ever cross a tile boundary.
169        * This really only works on legacy X and Y tiling formats.
170        */
171       assert(tiling == ISL_TILING_X || tiling == ISL_TILING_Y0);
172       assert(bs % 3 == 0 && isl_is_pow2(format_bpb / 3));
173       isl_tiling_get_info(tiling, format_bpb / 3, tile_info);
174       return;
175    }
176 
177    switch (tiling) {
178    case ISL_TILING_LINEAR:
179       assert(bs > 0);
180       logical_el = isl_extent2d(1, 1);
181       phys_B = isl_extent2d(bs, 1);
182       break;
183 
184    case ISL_TILING_X:
185       assert(bs > 0);
186       logical_el = isl_extent2d(512 / bs, 8);
187       phys_B = isl_extent2d(512, 8);
188       break;
189 
190    case ISL_TILING_Y0:
191       assert(bs > 0);
192       logical_el = isl_extent2d(128 / bs, 32);
193       phys_B = isl_extent2d(128, 32);
194       break;
195 
196    case ISL_TILING_W:
197       assert(bs == 1);
198       logical_el = isl_extent2d(64, 64);
199       /* From the Broadwell PRM Vol 2d, RENDER_SURFACE_STATE::SurfacePitch:
200        *
201        *    "If the surface is a stencil buffer (and thus has Tile Mode set
202        *    to TILEMODE_WMAJOR), the pitch must be set to 2x the value
203        *    computed based on width, as the stencil buffer is stored with two
204        *    rows interleaved."
205        *
206        * This, together with the fact that stencil buffers are referred to as
207        * being Y-tiled in the PRMs for older hardware implies that the
208        * physical size of a W-tile is actually the same as for a Y-tile.
209        */
210       phys_B = isl_extent2d(128, 32);
211       break;
212 
213    case ISL_TILING_Yf:
214    case ISL_TILING_Ys: {
215       bool is_Ys = tiling == ISL_TILING_Ys;
216 
217       assert(bs > 0);
218       unsigned width = 1 << (6 + (ffs(bs) / 2) + (2 * is_Ys));
219       unsigned height = 1 << (6 - (ffs(bs) / 2) + (2 * is_Ys));
220 
221       logical_el = isl_extent2d(width / bs, height);
222       phys_B = isl_extent2d(width, height);
223       break;
224    }
225 
226    case ISL_TILING_HIZ:
227       /* HiZ buffers are required to have ISL_FORMAT_HIZ which is an 8x4
228        * 128bpb format.  The tiling has the same physical dimensions as
229        * Y-tiling but actually has two HiZ columns per Y-tiled column.
230        */
231       assert(bs == 16);
232       logical_el = isl_extent2d(16, 16);
233       phys_B = isl_extent2d(128, 32);
234       break;
235 
236    case ISL_TILING_CCS:
237       /* CCS surfaces are required to have one of the GENX_CCS_* formats which
238        * have a block size of 1 or 2 bits per block and each CCS element
239        * corresponds to one cache-line pair in the main surface.  From the Sky
240        * Lake PRM Vol. 12 in the section on planes:
241        *
242        *    "The Color Control Surface (CCS) contains the compression status
243        *    of the cache-line pairs. The compression state of the cache-line
244        *    pair is specified by 2 bits in the CCS.  Each CCS cache-line
245        *    represents an area on the main surface of 16x16 sets of 128 byte
246        *    Y-tiled cache-line-pairs. CCS is always Y tiled."
247        *
248        * The CCS being Y-tiled implies that it's an 8x8 grid of cache-lines.
249        * Since each cache line corresponds to a 16x16 set of cache-line pairs,
250        * that yields total tile area of 128x128 cache-line pairs or CCS
251        * elements.  On older hardware, each CCS element is 1 bit and the tile
252        * is 128x256 elements.
253        */
254       assert(format_bpb == 1 || format_bpb == 2);
255       logical_el = isl_extent2d(128, 256 / format_bpb);
256       phys_B = isl_extent2d(128, 32);
257       break;
258 
259    default:
260       unreachable("not reached");
261    } /* end switch */
262 
263    *tile_info = (struct isl_tile_info) {
264       .tiling = tiling,
265       .format_bpb = format_bpb,
266       .logical_extent_el = logical_el,
267       .phys_extent_B = phys_B,
268    };
269 }
270 
271 bool
isl_color_value_is_zero(union isl_color_value value,enum isl_format format)272 isl_color_value_is_zero(union isl_color_value value,
273                         enum isl_format format)
274 {
275    const struct isl_format_layout *fmtl = isl_format_get_layout(format);
276 
277 #define RETURN_FALSE_IF_NOT_0(c, i) \
278    if (fmtl->channels.c.bits && value.u32[i] != 0) \
279       return false
280 
281    RETURN_FALSE_IF_NOT_0(r, 0);
282    RETURN_FALSE_IF_NOT_0(g, 1);
283    RETURN_FALSE_IF_NOT_0(b, 2);
284    RETURN_FALSE_IF_NOT_0(a, 3);
285 
286 #undef RETURN_FALSE_IF_NOT_0
287 
288    return true;
289 }
290 
291 bool
isl_color_value_is_zero_one(union isl_color_value value,enum isl_format format)292 isl_color_value_is_zero_one(union isl_color_value value,
293                             enum isl_format format)
294 {
295    const struct isl_format_layout *fmtl = isl_format_get_layout(format);
296 
297 #define RETURN_FALSE_IF_NOT_0_1(c, i, field) \
298    if (fmtl->channels.c.bits && value.field[i] != 0 && value.field[i] != 1) \
299       return false
300 
301    if (isl_format_has_int_channel(format)) {
302       RETURN_FALSE_IF_NOT_0_1(r, 0, u32);
303       RETURN_FALSE_IF_NOT_0_1(g, 1, u32);
304       RETURN_FALSE_IF_NOT_0_1(b, 2, u32);
305       RETURN_FALSE_IF_NOT_0_1(a, 3, u32);
306    } else {
307       RETURN_FALSE_IF_NOT_0_1(r, 0, f32);
308       RETURN_FALSE_IF_NOT_0_1(g, 1, f32);
309       RETURN_FALSE_IF_NOT_0_1(b, 2, f32);
310       RETURN_FALSE_IF_NOT_0_1(a, 3, f32);
311    }
312 
313 #undef RETURN_FALSE_IF_NOT_0_1
314 
315    return true;
316 }
317 
318 /**
319  * @param[out] tiling is set only on success
320  */
321 static bool
isl_surf_choose_tiling(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,enum isl_tiling * tiling)322 isl_surf_choose_tiling(const struct isl_device *dev,
323                        const struct isl_surf_init_info *restrict info,
324                        enum isl_tiling *tiling)
325 {
326    isl_tiling_flags_t tiling_flags = info->tiling_flags;
327 
328    /* HiZ surfaces always use the HiZ tiling */
329    if (info->usage & ISL_SURF_USAGE_HIZ_BIT) {
330       assert(info->format == ISL_FORMAT_HIZ);
331       assert(tiling_flags == ISL_TILING_HIZ_BIT);
332       *tiling = ISL_TILING_HIZ;
333       return true;
334    }
335 
336    /* CCS surfaces always use the CCS tiling */
337    if (info->usage & ISL_SURF_USAGE_CCS_BIT) {
338       assert(isl_format_get_layout(info->format)->txc == ISL_TXC_CCS);
339       assert(tiling_flags == ISL_TILING_CCS_BIT);
340       *tiling = ISL_TILING_CCS;
341       return true;
342    }
343 
344    if (ISL_DEV_GEN(dev) >= 6) {
345       isl_gen6_filter_tiling(dev, info, &tiling_flags);
346    } else {
347       isl_gen4_filter_tiling(dev, info, &tiling_flags);
348    }
349 
350    #define CHOOSE(__tiling) \
351       do { \
352          if (tiling_flags & (1u << (__tiling))) { \
353             *tiling = (__tiling); \
354             return true; \
355           } \
356       } while (0)
357 
358    /* Of the tiling modes remaining, choose the one that offers the best
359     * performance.
360     */
361 
362    if (info->dim == ISL_SURF_DIM_1D) {
363       /* Prefer linear for 1D surfaces because they do not benefit from
364        * tiling. To the contrary, tiling leads to wasted memory and poor
365        * memory locality due to the swizzling and alignment restrictions
366        * required in tiled surfaces.
367        */
368       CHOOSE(ISL_TILING_LINEAR);
369    }
370 
371    CHOOSE(ISL_TILING_Ys);
372    CHOOSE(ISL_TILING_Yf);
373    CHOOSE(ISL_TILING_Y0);
374    CHOOSE(ISL_TILING_X);
375    CHOOSE(ISL_TILING_W);
376    CHOOSE(ISL_TILING_LINEAR);
377 
378    #undef CHOOSE
379 
380    /* No tiling mode accomodates the inputs. */
381    return false;
382 }
383 
384 static bool
isl_choose_msaa_layout(const struct isl_device * dev,const struct isl_surf_init_info * info,enum isl_tiling tiling,enum isl_msaa_layout * msaa_layout)385 isl_choose_msaa_layout(const struct isl_device *dev,
386                  const struct isl_surf_init_info *info,
387                  enum isl_tiling tiling,
388                  enum isl_msaa_layout *msaa_layout)
389 {
390    if (ISL_DEV_GEN(dev) >= 8) {
391       return isl_gen8_choose_msaa_layout(dev, info, tiling, msaa_layout);
392    } else if (ISL_DEV_GEN(dev) >= 7) {
393       return isl_gen7_choose_msaa_layout(dev, info, tiling, msaa_layout);
394    } else if (ISL_DEV_GEN(dev) >= 6) {
395       return isl_gen6_choose_msaa_layout(dev, info, tiling, msaa_layout);
396    } else {
397       return isl_gen4_choose_msaa_layout(dev, info, tiling, msaa_layout);
398    }
399 }
400 
401 struct isl_extent2d
isl_get_interleaved_msaa_px_size_sa(uint32_t samples)402 isl_get_interleaved_msaa_px_size_sa(uint32_t samples)
403 {
404    assert(isl_is_pow2(samples));
405 
406    /* From the Broadwell PRM >> Volume 5: Memory Views >> Computing Mip Level
407     * Sizes (p133):
408     *
409     *    If the surface is multisampled and it is a depth or stencil surface
410     *    or Multisampled Surface StorageFormat in SURFACE_STATE is
411     *    MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
412     *    proceeding: [...]
413     */
414    return (struct isl_extent2d) {
415       .width = 1 << ((ffs(samples) - 0) / 2),
416       .height = 1 << ((ffs(samples) - 1) / 2),
417    };
418 }
419 
420 static void
isl_msaa_interleaved_scale_px_to_sa(uint32_t samples,uint32_t * width,uint32_t * height)421 isl_msaa_interleaved_scale_px_to_sa(uint32_t samples,
422                                     uint32_t *width, uint32_t *height)
423 {
424    const struct isl_extent2d px_size_sa =
425       isl_get_interleaved_msaa_px_size_sa(samples);
426 
427    if (width)
428       *width = isl_align(*width, 2) * px_size_sa.width;
429    if (height)
430       *height = isl_align(*height, 2) * px_size_sa.height;
431 }
432 
433 static enum isl_array_pitch_span
isl_choose_array_pitch_span(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,enum isl_dim_layout dim_layout,const struct isl_extent4d * phys_level0_sa)434 isl_choose_array_pitch_span(const struct isl_device *dev,
435                             const struct isl_surf_init_info *restrict info,
436                             enum isl_dim_layout dim_layout,
437                             const struct isl_extent4d *phys_level0_sa)
438 {
439    switch (dim_layout) {
440    case ISL_DIM_LAYOUT_GEN9_1D:
441    case ISL_DIM_LAYOUT_GEN4_2D:
442       if (ISL_DEV_GEN(dev) >= 8) {
443          /* QPitch becomes programmable in Broadwell. So choose the
444           * most compact QPitch possible in order to conserve memory.
445           *
446           * From the Broadwell PRM >> Volume 2d: Command Reference: Structures
447           * >> RENDER_SURFACE_STATE Surface QPitch (p325):
448           *
449           *    - Software must ensure that this field is set to a value
450           *      sufficiently large such that the array slices in the surface
451           *      do not overlap. Refer to the Memory Data Formats section for
452           *      information on how surfaces are stored in memory.
453           *
454           *    - This field specifies the distance in rows between array
455           *      slices.  It is used only in the following cases:
456           *
457           *          - Surface Array is enabled OR
458           *          - Number of Mulitsamples is not NUMSAMPLES_1 and
459           *            Multisampled Surface Storage Format set to MSFMT_MSS OR
460           *          - Surface Type is SURFTYPE_CUBE
461           */
462          return ISL_ARRAY_PITCH_SPAN_COMPACT;
463       } else if (ISL_DEV_GEN(dev) >= 7) {
464          /* Note that Ivybridge introduces
465           * RENDER_SURFACE_STATE.SurfaceArraySpacing, which provides the
466           * driver more control over the QPitch.
467           */
468 
469          if (phys_level0_sa->array_len == 1) {
470             /* The hardware will never use the QPitch. So choose the most
471              * compact QPitch possible in order to conserve memory.
472              */
473             return ISL_ARRAY_PITCH_SPAN_COMPACT;
474          }
475 
476          if (isl_surf_usage_is_depth_or_stencil(info->usage) ||
477              (info->usage & ISL_SURF_USAGE_HIZ_BIT)) {
478             /* From the Ivybridge PRM >> Volume 1 Part 1: Graphics Core >>
479              * Section 6.18.4.7: Surface Arrays (p112):
480              *
481              *    If Surface Array Spacing is set to ARYSPC_FULL (note that
482              *    the depth buffer and stencil buffer have an implied value of
483              *    ARYSPC_FULL):
484              */
485             return ISL_ARRAY_PITCH_SPAN_FULL;
486          }
487 
488          if (info->levels == 1) {
489             /* We are able to set RENDER_SURFACE_STATE.SurfaceArraySpacing
490              * to ARYSPC_LOD0.
491              */
492             return ISL_ARRAY_PITCH_SPAN_COMPACT;
493          }
494 
495          return ISL_ARRAY_PITCH_SPAN_FULL;
496       } else if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) &&
497                  ISL_DEV_USE_SEPARATE_STENCIL(dev) &&
498                  isl_surf_usage_is_stencil(info->usage)) {
499          /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
500           * Graphics Core >> Section 7.18.3.7: Surface Arrays:
501           *
502           *    The separate stencil buffer does not support mip mapping, thus
503           *    the storage for LODs other than LOD 0 is not needed.
504           */
505          assert(info->levels == 1);
506          return ISL_ARRAY_PITCH_SPAN_COMPACT;
507       } else {
508          if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) &&
509              ISL_DEV_USE_SEPARATE_STENCIL(dev) &&
510              isl_surf_usage_is_stencil(info->usage)) {
511             /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
512              * Graphics Core >> Section 7.18.3.7: Surface Arrays:
513              *
514              *    The separate stencil buffer does not support mip mapping,
515              *    thus the storage for LODs other than LOD 0 is not needed.
516              */
517             assert(info->levels == 1);
518             assert(phys_level0_sa->array_len == 1);
519             return ISL_ARRAY_PITCH_SPAN_COMPACT;
520          }
521 
522          if (phys_level0_sa->array_len == 1) {
523             /* The hardware will never use the QPitch. So choose the most
524              * compact QPitch possible in order to conserve memory.
525              */
526             return ISL_ARRAY_PITCH_SPAN_COMPACT;
527          }
528 
529          return ISL_ARRAY_PITCH_SPAN_FULL;
530       }
531 
532    case ISL_DIM_LAYOUT_GEN4_3D:
533       /* The hardware will never use the QPitch. So choose the most
534        * compact QPitch possible in order to conserve memory.
535        */
536       return ISL_ARRAY_PITCH_SPAN_COMPACT;
537 
538    case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
539       /* Each array image in the gen6 stencil of HiZ surface is compact in the
540        * sense that every LOD is a compact array of the same size as LOD0.
541        */
542       return ISL_ARRAY_PITCH_SPAN_COMPACT;
543    }
544 
545    unreachable("bad isl_dim_layout");
546    return ISL_ARRAY_PITCH_SPAN_FULL;
547 }
548 
549 static void
isl_choose_image_alignment_el(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,enum isl_tiling tiling,enum isl_dim_layout dim_layout,enum isl_msaa_layout msaa_layout,struct isl_extent3d * image_align_el)550 isl_choose_image_alignment_el(const struct isl_device *dev,
551                               const struct isl_surf_init_info *restrict info,
552                               enum isl_tiling tiling,
553                               enum isl_dim_layout dim_layout,
554                               enum isl_msaa_layout msaa_layout,
555                               struct isl_extent3d *image_align_el)
556 {
557    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
558    if (fmtl->txc == ISL_TXC_MCS) {
559       assert(tiling == ISL_TILING_Y0);
560 
561       /*
562        * IvyBrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
563        *
564        * Height, width, and layout of MCS buffer in this case must match with
565        * Render Target height, width, and layout. MCS buffer is tiledY.
566        *
567        * To avoid wasting memory, choose the smallest alignment possible:
568        * HALIGN_4 and VALIGN_4.
569        */
570       *image_align_el = isl_extent3d(4, 4, 1);
571       return;
572    } else if (info->format == ISL_FORMAT_HIZ) {
573       assert(ISL_DEV_GEN(dev) >= 6);
574       if (ISL_DEV_GEN(dev) == 6) {
575          /* HiZ surfaces on Sandy Bridge are packed tightly. */
576          *image_align_el = isl_extent3d(1, 1, 1);
577       } else {
578          /* On gen7+, HiZ surfaces are always aligned to 16x8 pixels in the
579           * primary surface which works out to 2x2 HiZ elments.
580           */
581          *image_align_el = isl_extent3d(2, 2, 1);
582       }
583       return;
584    }
585 
586    if (ISL_DEV_GEN(dev) >= 9) {
587       isl_gen9_choose_image_alignment_el(dev, info, tiling, dim_layout,
588                                          msaa_layout, image_align_el);
589    } else if (ISL_DEV_GEN(dev) >= 8) {
590       isl_gen8_choose_image_alignment_el(dev, info, tiling, dim_layout,
591                                          msaa_layout, image_align_el);
592    } else if (ISL_DEV_GEN(dev) >= 7) {
593       isl_gen7_choose_image_alignment_el(dev, info, tiling, dim_layout,
594                                           msaa_layout, image_align_el);
595    } else if (ISL_DEV_GEN(dev) >= 6) {
596       isl_gen6_choose_image_alignment_el(dev, info, tiling, dim_layout,
597                                          msaa_layout, image_align_el);
598    } else {
599       isl_gen4_choose_image_alignment_el(dev, info, tiling, dim_layout,
600                                          msaa_layout, image_align_el);
601    }
602 }
603 
604 static enum isl_dim_layout
isl_surf_choose_dim_layout(const struct isl_device * dev,enum isl_surf_dim logical_dim,enum isl_tiling tiling,isl_surf_usage_flags_t usage)605 isl_surf_choose_dim_layout(const struct isl_device *dev,
606                            enum isl_surf_dim logical_dim,
607                            enum isl_tiling tiling,
608                            isl_surf_usage_flags_t usage)
609 {
610    /* Sandy bridge needs a special layout for HiZ and stencil. */
611    if (ISL_DEV_GEN(dev) == 6 &&
612        (tiling == ISL_TILING_W || tiling == ISL_TILING_HIZ))
613       return ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ;
614 
615    if (ISL_DEV_GEN(dev) >= 9) {
616       switch (logical_dim) {
617       case ISL_SURF_DIM_1D:
618          /* From the Sky Lake PRM Vol. 5, "1D Surfaces":
619           *
620           *    One-dimensional surfaces use a tiling mode of linear.
621           *    Technically, they are not tiled resources, but the Tiled
622           *    Resource Mode field in RENDER_SURFACE_STATE is still used to
623           *    indicate the alignment requirements for this linear surface
624           *    (See 1D Alignment requirements for how 4K and 64KB Tiled
625           *    Resource Modes impact alignment). Alternatively, a 1D surface
626           *    can be defined as a 2D tiled surface (e.g. TileY or TileX) with
627           *    a height of 0.
628           *
629           * In other words, ISL_DIM_LAYOUT_GEN9_1D is only used for linear
630           * surfaces and, for tiled surfaces, ISL_DIM_LAYOUT_GEN4_2D is used.
631           */
632          if (tiling == ISL_TILING_LINEAR)
633             return ISL_DIM_LAYOUT_GEN9_1D;
634          else
635             return ISL_DIM_LAYOUT_GEN4_2D;
636       case ISL_SURF_DIM_2D:
637       case ISL_SURF_DIM_3D:
638          return ISL_DIM_LAYOUT_GEN4_2D;
639       }
640    } else {
641       switch (logical_dim) {
642       case ISL_SURF_DIM_1D:
643       case ISL_SURF_DIM_2D:
644          /* From the G45 PRM Vol. 1a, "6.17.4.1 Hardware Cube Map Layout":
645           *
646           * The cube face textures are stored in the same way as 3D surfaces
647           * are stored (see section 6.17.5 for details).  For cube surfaces,
648           * however, the depth is equal to the number of faces (always 6) and
649           * is not reduced for each MIP.
650           */
651          if (ISL_DEV_GEN(dev) == 4 && (usage & ISL_SURF_USAGE_CUBE_BIT))
652             return ISL_DIM_LAYOUT_GEN4_3D;
653 
654          return ISL_DIM_LAYOUT_GEN4_2D;
655       case ISL_SURF_DIM_3D:
656          return ISL_DIM_LAYOUT_GEN4_3D;
657       }
658    }
659 
660    unreachable("bad isl_surf_dim");
661    return ISL_DIM_LAYOUT_GEN4_2D;
662 }
663 
664 /**
665  * Calculate the physical extent of the surface's first level, in units of
666  * surface samples. The result is aligned to the format's compression block.
667  */
668 static void
isl_calc_phys_level0_extent_sa(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,enum isl_dim_layout dim_layout,enum isl_tiling tiling,enum isl_msaa_layout msaa_layout,struct isl_extent4d * phys_level0_sa)669 isl_calc_phys_level0_extent_sa(const struct isl_device *dev,
670                                const struct isl_surf_init_info *restrict info,
671                                enum isl_dim_layout dim_layout,
672                                enum isl_tiling tiling,
673                                enum isl_msaa_layout msaa_layout,
674                                struct isl_extent4d *phys_level0_sa)
675 {
676    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
677 
678    if (isl_format_is_yuv(info->format))
679       isl_finishme("%s:%s: YUV format", __FILE__, __func__);
680 
681    switch (info->dim) {
682    case ISL_SURF_DIM_1D:
683       assert(info->height == 1);
684       assert(info->depth == 1);
685       assert(info->samples == 1);
686 
687       switch (dim_layout) {
688       case ISL_DIM_LAYOUT_GEN4_3D:
689          unreachable("bad isl_dim_layout");
690 
691       case ISL_DIM_LAYOUT_GEN9_1D:
692       case ISL_DIM_LAYOUT_GEN4_2D:
693       case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
694          *phys_level0_sa = (struct isl_extent4d) {
695             .w = isl_align_npot(info->width, fmtl->bw),
696             .h = fmtl->bh,
697             .d = 1,
698             .a = info->array_len,
699          };
700          break;
701       }
702       break;
703 
704    case ISL_SURF_DIM_2D:
705       if (ISL_DEV_GEN(dev) == 4 && (info->usage & ISL_SURF_USAGE_CUBE_BIT))
706          assert(dim_layout == ISL_DIM_LAYOUT_GEN4_3D);
707       else
708          assert(dim_layout == ISL_DIM_LAYOUT_GEN4_2D ||
709                 dim_layout == ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ);
710 
711       if (tiling == ISL_TILING_Ys && info->samples > 1)
712          isl_finishme("%s:%s: multisample TileYs layout", __FILE__, __func__);
713 
714       switch (msaa_layout) {
715       case ISL_MSAA_LAYOUT_NONE:
716          assert(info->depth == 1);
717          assert(info->samples == 1);
718 
719          *phys_level0_sa = (struct isl_extent4d) {
720             .w = isl_align_npot(info->width, fmtl->bw),
721             .h = isl_align_npot(info->height, fmtl->bh),
722             .d = 1,
723             .a = info->array_len,
724          };
725          break;
726 
727       case ISL_MSAA_LAYOUT_ARRAY:
728          assert(info->depth == 1);
729          assert(info->levels == 1);
730          assert(isl_format_supports_multisampling(dev->info, info->format));
731          assert(fmtl->bw == 1 && fmtl->bh == 1);
732 
733          *phys_level0_sa = (struct isl_extent4d) {
734             .w = info->width,
735             .h = info->height,
736             .d = 1,
737             .a = info->array_len * info->samples,
738          };
739          break;
740 
741       case ISL_MSAA_LAYOUT_INTERLEAVED:
742          assert(info->depth == 1);
743          assert(info->levels == 1);
744          assert(isl_format_supports_multisampling(dev->info, info->format));
745 
746          *phys_level0_sa = (struct isl_extent4d) {
747             .w = info->width,
748             .h = info->height,
749             .d = 1,
750             .a = info->array_len,
751          };
752 
753          isl_msaa_interleaved_scale_px_to_sa(info->samples,
754                                              &phys_level0_sa->w,
755                                              &phys_level0_sa->h);
756 
757          phys_level0_sa->w = isl_align(phys_level0_sa->w, fmtl->bw);
758          phys_level0_sa->h = isl_align(phys_level0_sa->h, fmtl->bh);
759          break;
760       }
761       break;
762 
763    case ISL_SURF_DIM_3D:
764       assert(info->array_len == 1);
765       assert(info->samples == 1);
766 
767       if (fmtl->bd > 1) {
768          isl_finishme("%s:%s: compression block with depth > 1",
769                       __FILE__, __func__);
770       }
771 
772       switch (dim_layout) {
773       case ISL_DIM_LAYOUT_GEN9_1D:
774       case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
775          unreachable("bad isl_dim_layout");
776 
777       case ISL_DIM_LAYOUT_GEN4_2D:
778          assert(ISL_DEV_GEN(dev) >= 9);
779 
780          *phys_level0_sa = (struct isl_extent4d) {
781             .w = isl_align_npot(info->width, fmtl->bw),
782             .h = isl_align_npot(info->height, fmtl->bh),
783             .d = 1,
784             .a = info->depth,
785          };
786          break;
787 
788       case ISL_DIM_LAYOUT_GEN4_3D:
789          assert(ISL_DEV_GEN(dev) < 9);
790          *phys_level0_sa = (struct isl_extent4d) {
791             .w = isl_align(info->width, fmtl->bw),
792             .h = isl_align(info->height, fmtl->bh),
793             .d = info->depth,
794             .a = 1,
795          };
796          break;
797       }
798       break;
799    }
800 }
801 
802 /**
803  * Calculate the pitch between physical array slices, in units of rows of
804  * surface elements.
805  */
806 static uint32_t
isl_calc_array_pitch_el_rows_gen4_2d(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_tile_info * tile_info,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,enum isl_array_pitch_span array_pitch_span,const struct isl_extent2d * phys_slice0_sa)807 isl_calc_array_pitch_el_rows_gen4_2d(
808       const struct isl_device *dev,
809       const struct isl_surf_init_info *restrict info,
810       const struct isl_tile_info *tile_info,
811       const struct isl_extent3d *image_align_sa,
812       const struct isl_extent4d *phys_level0_sa,
813       enum isl_array_pitch_span array_pitch_span,
814       const struct isl_extent2d *phys_slice0_sa)
815 {
816    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
817    uint32_t pitch_sa_rows = 0;
818 
819    switch (array_pitch_span) {
820    case ISL_ARRAY_PITCH_SPAN_COMPACT:
821       pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h);
822       break;
823    case ISL_ARRAY_PITCH_SPAN_FULL: {
824       /* The QPitch equation is found in the Broadwell PRM >> Volume 5:
825        * Memory Views >> Common Surface Formats >> Surface Layout >> 2D
826        * Surfaces >> Surface Arrays.
827        */
828       uint32_t H0_sa = phys_level0_sa->h;
829       uint32_t H1_sa = isl_minify(H0_sa, 1);
830 
831       uint32_t h0_sa = isl_align_npot(H0_sa, image_align_sa->h);
832       uint32_t h1_sa = isl_align_npot(H1_sa, image_align_sa->h);
833 
834       uint32_t m;
835       if (ISL_DEV_GEN(dev) >= 7) {
836          /* The QPitch equation changed slightly in Ivybridge. */
837          m = 12;
838       } else {
839          m = 11;
840       }
841 
842       pitch_sa_rows = h0_sa + h1_sa + (m * image_align_sa->h);
843 
844       if (ISL_DEV_GEN(dev) == 6 && info->samples > 1 &&
845           (info->height % 4 == 1)) {
846          /* [SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
847           * Graphics Core >> Section 7.18.3.7: Surface Arrays:
848           *
849           *    [SNB] Errata: Sampler MSAA Qpitch will be 4 greater than
850           *    the value calculated in the equation above , for every
851           *    other odd Surface Height starting from 1 i.e. 1,5,9,13.
852           *
853           * XXX(chadv): Is the errata natural corollary of the physical
854           * layout of interleaved samples?
855           */
856          pitch_sa_rows += 4;
857       }
858 
859       pitch_sa_rows = isl_align_npot(pitch_sa_rows, fmtl->bh);
860       } /* end case */
861       break;
862    }
863 
864    assert(pitch_sa_rows % fmtl->bh == 0);
865    uint32_t pitch_el_rows = pitch_sa_rows / fmtl->bh;
866 
867    if (ISL_DEV_GEN(dev) >= 9 && fmtl->txc == ISL_TXC_CCS) {
868       /*
869        * From the Sky Lake PRM Vol 7, "MCS Buffer for Render Target(s)" (p. 632):
870        *
871        *    "Mip-mapped and arrayed surfaces are supported with MCS buffer
872        *    layout with these alignments in the RT space: Horizontal
873        *    Alignment = 128 and Vertical Alignment = 64."
874        *
875        * From the Sky Lake PRM Vol. 2d, "RENDER_SURFACE_STATE" (p. 435):
876        *
877        *    "For non-multisampled render target's CCS auxiliary surface,
878        *    QPitch must be computed with Horizontal Alignment = 128 and
879        *    Surface Vertical Alignment = 256. These alignments are only for
880        *    CCS buffer and not for associated render target."
881        *
882        * The first restriction is already handled by isl_choose_image_alignment_el
883        * but the second restriction, which is an extension of the first, only
884        * applies to qpitch and must be applied here.
885        */
886       assert(fmtl->bh == 4);
887       pitch_el_rows = isl_align(pitch_el_rows, 256 / 4);
888    }
889 
890    if (ISL_DEV_GEN(dev) >= 9 &&
891        info->dim == ISL_SURF_DIM_3D &&
892        tile_info->tiling != ISL_TILING_LINEAR) {
893       /* From the Skylake BSpec >> RENDER_SURFACE_STATE >> Surface QPitch:
894        *
895        *    Tile Mode != Linear: This field must be set to an integer multiple
896        *    of the tile height
897        */
898       pitch_el_rows = isl_align(pitch_el_rows, tile_info->logical_extent_el.height);
899    }
900 
901    return pitch_el_rows;
902 }
903 
904 /**
905  * A variant of isl_calc_phys_slice0_extent_sa() specific to
906  * ISL_DIM_LAYOUT_GEN4_2D.
907  */
908 static void
isl_calc_phys_slice0_extent_sa_gen4_2d(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,enum isl_msaa_layout msaa_layout,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,struct isl_extent2d * phys_slice0_sa)909 isl_calc_phys_slice0_extent_sa_gen4_2d(
910       const struct isl_device *dev,
911       const struct isl_surf_init_info *restrict info,
912       enum isl_msaa_layout msaa_layout,
913       const struct isl_extent3d *image_align_sa,
914       const struct isl_extent4d *phys_level0_sa,
915       struct isl_extent2d *phys_slice0_sa)
916 {
917    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
918 
919    assert(phys_level0_sa->depth == 1);
920 
921    if (info->levels == 1) {
922       /* Do not pad the surface to the image alignment. Instead, pad it only
923        * to the pixel format's block alignment.
924        *
925        * For tiled surfaces, using a reduced alignment here avoids wasting CPU
926        * cycles on the below mipmap layout caluclations. Reducing the
927        * alignment here is safe because we later align the row pitch and array
928        * pitch to the tile boundary. It is safe even for
929        * ISL_MSAA_LAYOUT_INTERLEAVED, because phys_level0_sa is already scaled
930        * to accomodate the interleaved samples.
931        *
932        * For linear surfaces, reducing the alignment here permits us to later
933        * choose an arbitrary, non-aligned row pitch. If the surface backs
934        * a VkBuffer, then an arbitrary pitch may be needed to accomodate
935        * VkBufferImageCopy::bufferRowLength.
936        */
937       *phys_slice0_sa = (struct isl_extent2d) {
938          .w = isl_align_npot(phys_level0_sa->w, fmtl->bw),
939          .h = isl_align_npot(phys_level0_sa->h, fmtl->bh),
940       };
941       return;
942    }
943 
944    uint32_t slice_top_w = 0;
945    uint32_t slice_bottom_w = 0;
946    uint32_t slice_left_h = 0;
947    uint32_t slice_right_h = 0;
948 
949    uint32_t W0 = phys_level0_sa->w;
950    uint32_t H0 = phys_level0_sa->h;
951 
952    for (uint32_t l = 0; l < info->levels; ++l) {
953       uint32_t W = isl_minify(W0, l);
954       uint32_t H = isl_minify(H0, l);
955 
956       uint32_t w = isl_align_npot(W, image_align_sa->w);
957       uint32_t h = isl_align_npot(H, image_align_sa->h);
958 
959       if (l == 0) {
960          slice_top_w = w;
961          slice_left_h = h;
962          slice_right_h = h;
963       } else if (l == 1) {
964          slice_bottom_w = w;
965          slice_left_h += h;
966       } else if (l == 2) {
967          slice_bottom_w += w;
968          slice_right_h += h;
969       } else {
970          slice_right_h += h;
971       }
972    }
973 
974    *phys_slice0_sa = (struct isl_extent2d) {
975       .w = MAX(slice_top_w, slice_bottom_w),
976       .h = MAX(slice_left_h, slice_right_h),
977    };
978 }
979 
980 static void
isl_calc_phys_total_extent_el_gen4_2d(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_tile_info * tile_info,enum isl_msaa_layout msaa_layout,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,enum isl_array_pitch_span array_pitch_span,uint32_t * array_pitch_el_rows,struct isl_extent2d * total_extent_el)981 isl_calc_phys_total_extent_el_gen4_2d(
982       const struct isl_device *dev,
983       const struct isl_surf_init_info *restrict info,
984       const struct isl_tile_info *tile_info,
985       enum isl_msaa_layout msaa_layout,
986       const struct isl_extent3d *image_align_sa,
987       const struct isl_extent4d *phys_level0_sa,
988       enum isl_array_pitch_span array_pitch_span,
989       uint32_t *array_pitch_el_rows,
990       struct isl_extent2d *total_extent_el)
991 {
992    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
993 
994    struct isl_extent2d phys_slice0_sa;
995    isl_calc_phys_slice0_extent_sa_gen4_2d(dev, info, msaa_layout,
996                                           image_align_sa, phys_level0_sa,
997                                           &phys_slice0_sa);
998    *array_pitch_el_rows =
999       isl_calc_array_pitch_el_rows_gen4_2d(dev, info, tile_info,
1000                                            image_align_sa, phys_level0_sa,
1001                                            array_pitch_span,
1002                                            &phys_slice0_sa);
1003    *total_extent_el = (struct isl_extent2d) {
1004       .w = isl_assert_div(phys_slice0_sa.w, fmtl->bw),
1005       .h = *array_pitch_el_rows * (phys_level0_sa->array_len - 1) +
1006            isl_assert_div(phys_slice0_sa.h, fmtl->bh),
1007    };
1008 }
1009 
1010 /**
1011  * A variant of isl_calc_phys_slice0_extent_sa() specific to
1012  * ISL_DIM_LAYOUT_GEN4_3D.
1013  */
1014 static void
isl_calc_phys_total_extent_el_gen4_3d(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,uint32_t * array_pitch_el_rows,struct isl_extent2d * phys_total_el)1015 isl_calc_phys_total_extent_el_gen4_3d(
1016       const struct isl_device *dev,
1017       const struct isl_surf_init_info *restrict info,
1018       const struct isl_extent3d *image_align_sa,
1019       const struct isl_extent4d *phys_level0_sa,
1020       uint32_t *array_pitch_el_rows,
1021       struct isl_extent2d *phys_total_el)
1022 {
1023    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1024 
1025    assert(info->samples == 1);
1026 
1027    if (info->dim != ISL_SURF_DIM_3D) {
1028       /* From the G45 PRM Vol. 1a, "6.17.4.1 Hardware Cube Map Layout":
1029        *
1030        * The cube face textures are stored in the same way as 3D surfaces
1031        * are stored (see section 6.17.5 for details).  For cube surfaces,
1032        * however, the depth is equal to the number of faces (always 6) and
1033        * is not reduced for each MIP.
1034        */
1035       assert(ISL_DEV_GEN(dev) == 4);
1036       assert(info->usage & ISL_SURF_USAGE_CUBE_BIT);
1037       assert(phys_level0_sa->array_len == 6);
1038    } else {
1039       assert(phys_level0_sa->array_len == 1);
1040    }
1041 
1042    uint32_t total_w = 0;
1043    uint32_t total_h = 0;
1044 
1045    uint32_t W0 = phys_level0_sa->w;
1046    uint32_t H0 = phys_level0_sa->h;
1047    uint32_t D0 = phys_level0_sa->d;
1048    uint32_t A0 = phys_level0_sa->a;
1049 
1050    for (uint32_t l = 0; l < info->levels; ++l) {
1051       uint32_t level_w = isl_align_npot(isl_minify(W0, l), image_align_sa->w);
1052       uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa->h);
1053       uint32_t level_d = info->dim == ISL_SURF_DIM_3D ? isl_minify(D0, l) : A0;
1054 
1055       uint32_t max_layers_horiz = MIN(level_d, 1u << l);
1056       uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l);
1057 
1058       total_w = MAX(total_w, level_w * max_layers_horiz);
1059       total_h += level_h * max_layers_vert;
1060    }
1061 
1062    /* GEN4_3D layouts don't really have an array pitch since each LOD has a
1063     * different number of horizontal and vertical layers.  We have to set it
1064     * to something, so at least make it true for LOD0.
1065     */
1066    *array_pitch_el_rows =
1067       isl_align_npot(phys_level0_sa->h, image_align_sa->h) / fmtl->bw;
1068    *phys_total_el = (struct isl_extent2d) {
1069       .w = isl_assert_div(total_w, fmtl->bw),
1070       .h = isl_assert_div(total_h, fmtl->bh),
1071    };
1072 }
1073 
1074 /**
1075  * A variant of isl_calc_phys_slice0_extent_sa() specific to
1076  * ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ.
1077  */
1078 static void
isl_calc_phys_total_extent_el_gen6_stencil_hiz(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_tile_info * tile_info,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,uint32_t * array_pitch_el_rows,struct isl_extent2d * phys_total_el)1079 isl_calc_phys_total_extent_el_gen6_stencil_hiz(
1080       const struct isl_device *dev,
1081       const struct isl_surf_init_info *restrict info,
1082       const struct isl_tile_info *tile_info,
1083       const struct isl_extent3d *image_align_sa,
1084       const struct isl_extent4d *phys_level0_sa,
1085       uint32_t *array_pitch_el_rows,
1086       struct isl_extent2d *phys_total_el)
1087 {
1088    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1089 
1090    const struct isl_extent2d tile_extent_sa = {
1091       .w = tile_info->logical_extent_el.w * fmtl->bw,
1092       .h = tile_info->logical_extent_el.h * fmtl->bh,
1093    };
1094    /* Tile size is a multiple of image alignment */
1095    assert(tile_extent_sa.w % image_align_sa->w == 0);
1096    assert(tile_extent_sa.h % image_align_sa->h == 0);
1097 
1098    const uint32_t W0 = phys_level0_sa->w;
1099    const uint32_t H0 = phys_level0_sa->h;
1100 
1101    /* Each image has the same height as LOD0 because the hardware thinks
1102     * everything is LOD0
1103     */
1104    const uint32_t H = isl_align(H0, image_align_sa->h) * phys_level0_sa->a;
1105 
1106    uint32_t total_top_w = 0;
1107    uint32_t total_bottom_w = 0;
1108    uint32_t total_h = 0;
1109 
1110    for (uint32_t l = 0; l < info->levels; ++l) {
1111       const uint32_t W = isl_minify(W0, l);
1112 
1113       const uint32_t w = isl_align(W, tile_extent_sa.w);
1114       const uint32_t h = isl_align(H, tile_extent_sa.h);
1115 
1116       if (l == 0) {
1117          total_top_w = w;
1118          total_h = h;
1119       } else if (l == 1) {
1120          total_bottom_w = w;
1121          total_h += h;
1122       } else {
1123          total_bottom_w += w;
1124       }
1125    }
1126 
1127    *array_pitch_el_rows =
1128       isl_assert_div(isl_align(H0, image_align_sa->h), fmtl->bh);
1129    *phys_total_el = (struct isl_extent2d) {
1130       .w = isl_assert_div(MAX(total_top_w, total_bottom_w), fmtl->bw),
1131       .h = isl_assert_div(total_h, fmtl->bh),
1132    };
1133 }
1134 
1135 /**
1136  * A variant of isl_calc_phys_slice0_extent_sa() specific to
1137  * ISL_DIM_LAYOUT_GEN9_1D.
1138  */
1139 static void
isl_calc_phys_total_extent_el_gen9_1d(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,uint32_t * array_pitch_el_rows,struct isl_extent2d * phys_total_el)1140 isl_calc_phys_total_extent_el_gen9_1d(
1141       const struct isl_device *dev,
1142       const struct isl_surf_init_info *restrict info,
1143       const struct isl_extent3d *image_align_sa,
1144       const struct isl_extent4d *phys_level0_sa,
1145       uint32_t *array_pitch_el_rows,
1146       struct isl_extent2d *phys_total_el)
1147 {
1148    MAYBE_UNUSED const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1149 
1150    assert(phys_level0_sa->height / fmtl->bh == 1);
1151    assert(phys_level0_sa->depth == 1);
1152    assert(info->samples == 1);
1153    assert(image_align_sa->w >= fmtl->bw);
1154 
1155    uint32_t slice_w = 0;
1156    const uint32_t W0 = phys_level0_sa->w;
1157 
1158    for (uint32_t l = 0; l < info->levels; ++l) {
1159       uint32_t W = isl_minify(W0, l);
1160       uint32_t w = isl_align_npot(W, image_align_sa->w);
1161 
1162       slice_w += w;
1163    }
1164 
1165    *array_pitch_el_rows = 1;
1166    *phys_total_el = (struct isl_extent2d) {
1167       .w = isl_assert_div(slice_w, fmtl->bw),
1168       .h = phys_level0_sa->array_len,
1169    };
1170 }
1171 
1172 /**
1173  * Calculate the two-dimensional total physical extent of the surface, in
1174  * units of surface elements.
1175  */
1176 static void
isl_calc_phys_total_extent_el(const struct isl_device * dev,const struct isl_surf_init_info * restrict info,const struct isl_tile_info * tile_info,enum isl_dim_layout dim_layout,enum isl_msaa_layout msaa_layout,const struct isl_extent3d * image_align_sa,const struct isl_extent4d * phys_level0_sa,enum isl_array_pitch_span array_pitch_span,uint32_t * array_pitch_el_rows,struct isl_extent2d * total_extent_el)1177 isl_calc_phys_total_extent_el(const struct isl_device *dev,
1178                               const struct isl_surf_init_info *restrict info,
1179                               const struct isl_tile_info *tile_info,
1180                               enum isl_dim_layout dim_layout,
1181                               enum isl_msaa_layout msaa_layout,
1182                               const struct isl_extent3d *image_align_sa,
1183                               const struct isl_extent4d *phys_level0_sa,
1184                               enum isl_array_pitch_span array_pitch_span,
1185                               uint32_t *array_pitch_el_rows,
1186                               struct isl_extent2d *total_extent_el)
1187 {
1188    switch (dim_layout) {
1189    case ISL_DIM_LAYOUT_GEN9_1D:
1190       assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
1191       isl_calc_phys_total_extent_el_gen9_1d(dev, info,
1192                                             image_align_sa, phys_level0_sa,
1193                                             array_pitch_el_rows,
1194                                             total_extent_el);
1195       return;
1196    case ISL_DIM_LAYOUT_GEN4_2D:
1197       isl_calc_phys_total_extent_el_gen4_2d(dev, info, tile_info, msaa_layout,
1198                                             image_align_sa, phys_level0_sa,
1199                                             array_pitch_span,
1200                                             array_pitch_el_rows,
1201                                             total_extent_el);
1202       return;
1203    case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
1204       assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
1205       isl_calc_phys_total_extent_el_gen6_stencil_hiz(dev, info, tile_info,
1206                                                      image_align_sa,
1207                                                      phys_level0_sa,
1208                                                      array_pitch_el_rows,
1209                                                      total_extent_el);
1210       return;
1211    case ISL_DIM_LAYOUT_GEN4_3D:
1212       assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
1213       isl_calc_phys_total_extent_el_gen4_3d(dev, info,
1214                                             image_align_sa, phys_level0_sa,
1215                                             array_pitch_el_rows,
1216                                             total_extent_el);
1217       return;
1218    }
1219 }
1220 
1221 static uint32_t
isl_calc_row_pitch_alignment(const struct isl_surf_init_info * surf_info,const struct isl_tile_info * tile_info)1222 isl_calc_row_pitch_alignment(const struct isl_surf_init_info *surf_info,
1223                              const struct isl_tile_info *tile_info)
1224 {
1225    if (tile_info->tiling != ISL_TILING_LINEAR)
1226       return tile_info->phys_extent_B.width;
1227 
1228    /* From the Broadwel PRM >> Volume 2d: Command Reference: Structures >>
1229     * RENDER_SURFACE_STATE Surface Pitch (p349):
1230     *
1231     *    - For linear render target surfaces and surfaces accessed with the
1232     *      typed data port messages, the pitch must be a multiple of the
1233     *      element size for non-YUV surface formats.  Pitch must be
1234     *      a multiple of 2 * element size for YUV surface formats.
1235     *
1236     *    - [Requirements for SURFTYPE_BUFFER and SURFTYPE_STRBUF, which we
1237     *      ignore because isl doesn't do buffers.]
1238     *
1239     *    - For other linear surfaces, the pitch can be any multiple of
1240     *      bytes.
1241     */
1242    const struct isl_format_layout *fmtl = isl_format_get_layout(surf_info->format);
1243    const uint32_t bs = fmtl->bpb / 8;
1244 
1245    if (surf_info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) {
1246       if (isl_format_is_yuv(surf_info->format)) {
1247          return 2 * bs;
1248       } else  {
1249          return bs;
1250       }
1251    }
1252 
1253    return 1;
1254 }
1255 
1256 static uint32_t
isl_calc_linear_min_row_pitch(const struct isl_device * dev,const struct isl_surf_init_info * info,const struct isl_extent2d * phys_total_el,uint32_t alignment)1257 isl_calc_linear_min_row_pitch(const struct isl_device *dev,
1258                               const struct isl_surf_init_info *info,
1259                               const struct isl_extent2d *phys_total_el,
1260                               uint32_t alignment)
1261 {
1262    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1263    const uint32_t bs = fmtl->bpb / 8;
1264 
1265    return isl_align_npot(bs * phys_total_el->w, alignment);
1266 }
1267 
1268 static uint32_t
isl_calc_tiled_min_row_pitch(const struct isl_device * dev,const struct isl_surf_init_info * surf_info,const struct isl_tile_info * tile_info,const struct isl_extent2d * phys_total_el,uint32_t alignment)1269 isl_calc_tiled_min_row_pitch(const struct isl_device *dev,
1270                              const struct isl_surf_init_info *surf_info,
1271                              const struct isl_tile_info *tile_info,
1272                              const struct isl_extent2d *phys_total_el,
1273                              uint32_t alignment)
1274 {
1275    const struct isl_format_layout *fmtl = isl_format_get_layout(surf_info->format);
1276 
1277    assert(fmtl->bpb % tile_info->format_bpb == 0);
1278 
1279    const uint32_t tile_el_scale = fmtl->bpb / tile_info->format_bpb;
1280    const uint32_t total_w_tl =
1281       isl_align_div(phys_total_el->w * tile_el_scale,
1282                     tile_info->logical_extent_el.width);
1283 
1284    assert(alignment == tile_info->phys_extent_B.width);
1285    return total_w_tl * tile_info->phys_extent_B.width;
1286 }
1287 
1288 static uint32_t
isl_calc_min_row_pitch(const struct isl_device * dev,const struct isl_surf_init_info * surf_info,const struct isl_tile_info * tile_info,const struct isl_extent2d * phys_total_el,uint32_t alignment)1289 isl_calc_min_row_pitch(const struct isl_device *dev,
1290                        const struct isl_surf_init_info *surf_info,
1291                        const struct isl_tile_info *tile_info,
1292                        const struct isl_extent2d *phys_total_el,
1293                        uint32_t alignment)
1294 {
1295    if (tile_info->tiling == ISL_TILING_LINEAR) {
1296       return isl_calc_linear_min_row_pitch(dev, surf_info, phys_total_el,
1297                                            alignment);
1298    } else {
1299       return isl_calc_tiled_min_row_pitch(dev, surf_info, tile_info,
1300                                           phys_total_el, alignment);
1301    }
1302 }
1303 
1304 /**
1305  * Is `pitch` in the valid range for a hardware bitfield, if the bitfield's
1306  * size is `bits` bits?
1307  *
1308  * Hardware pitch fields are offset by 1. For example, if the size of
1309  * RENDER_SURFACE_STATE::SurfacePitch is B bits, then the range of valid
1310  * pitches is [1, 2^b] inclusive.  If the surface pitch is N, then
1311  * RENDER_SURFACE_STATE::SurfacePitch must be set to N-1.
1312  */
1313 static bool
pitch_in_range(uint32_t n,uint32_t bits)1314 pitch_in_range(uint32_t n, uint32_t bits)
1315 {
1316    assert(n != 0);
1317    return likely(bits != 0 && 1 <= n && n <= (1 << bits));
1318 }
1319 
1320 static bool
isl_calc_row_pitch(const struct isl_device * dev,const struct isl_surf_init_info * surf_info,const struct isl_tile_info * tile_info,enum isl_dim_layout dim_layout,const struct isl_extent2d * phys_total_el,uint32_t * out_row_pitch)1321 isl_calc_row_pitch(const struct isl_device *dev,
1322                    const struct isl_surf_init_info *surf_info,
1323                    const struct isl_tile_info *tile_info,
1324                    enum isl_dim_layout dim_layout,
1325                    const struct isl_extent2d *phys_total_el,
1326                    uint32_t *out_row_pitch)
1327 {
1328    uint32_t alignment =
1329       isl_calc_row_pitch_alignment(surf_info, tile_info);
1330 
1331    /* If pitch isn't given and it can be chosen freely, align it by cache line
1332     * allowing one to use blit engine on the surface.
1333     */
1334    if (surf_info->row_pitch == 0 && tile_info->tiling == ISL_TILING_LINEAR) {
1335       /* From the Broadwell PRM docs for XY_SRC_COPY_BLT::SourceBaseAddress:
1336        *
1337        *    "Base address of the destination surface: X=0, Y=0. Lower 32bits
1338        *    of the 48bit addressing. When Src Tiling is enabled (Bit_15
1339        *    enabled), this address must be 4KB-aligned. When Tiling is not
1340        *    enabled, this address should be CL (64byte) aligned."
1341        */
1342       alignment = MAX2(alignment, 64);
1343    }
1344 
1345    const uint32_t min_row_pitch =
1346       isl_calc_min_row_pitch(dev, surf_info, tile_info, phys_total_el,
1347                              alignment);
1348 
1349    uint32_t row_pitch = min_row_pitch;
1350 
1351    if (surf_info->row_pitch != 0) {
1352       row_pitch = surf_info->row_pitch;
1353 
1354       if (row_pitch < min_row_pitch)
1355          return false;
1356 
1357       if (row_pitch % alignment != 0)
1358          return false;
1359    }
1360 
1361    const uint32_t row_pitch_tiles = row_pitch / tile_info->phys_extent_B.width;
1362 
1363    if (row_pitch == 0)
1364       return false;
1365 
1366    if (dim_layout == ISL_DIM_LAYOUT_GEN9_1D) {
1367       /* SurfacePitch is ignored for this layout. */
1368       goto done;
1369    }
1370 
1371    if ((surf_info->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT |
1372                             ISL_SURF_USAGE_TEXTURE_BIT |
1373                             ISL_SURF_USAGE_STORAGE_BIT)) &&
1374        !pitch_in_range(row_pitch, RENDER_SURFACE_STATE_SurfacePitch_bits(dev->info)))
1375       return false;
1376 
1377    if ((surf_info->usage & (ISL_SURF_USAGE_CCS_BIT |
1378                             ISL_SURF_USAGE_MCS_BIT)) &&
1379        !pitch_in_range(row_pitch_tiles, RENDER_SURFACE_STATE_AuxiliarySurfacePitch_bits(dev->info)))
1380       return false;
1381 
1382    if ((surf_info->usage & ISL_SURF_USAGE_DEPTH_BIT) &&
1383        !pitch_in_range(row_pitch, _3DSTATE_DEPTH_BUFFER_SurfacePitch_bits(dev->info)))
1384       return false;
1385 
1386    if ((surf_info->usage & ISL_SURF_USAGE_HIZ_BIT) &&
1387        !pitch_in_range(row_pitch, _3DSTATE_HIER_DEPTH_BUFFER_SurfacePitch_bits(dev->info)))
1388       return false;
1389 
1390    const uint32_t stencil_pitch_bits = dev->use_separate_stencil ?
1391       _3DSTATE_STENCIL_BUFFER_SurfacePitch_bits(dev->info) :
1392       _3DSTATE_DEPTH_BUFFER_SurfacePitch_bits(dev->info);
1393 
1394    if ((surf_info->usage & ISL_SURF_USAGE_STENCIL_BIT) &&
1395        !pitch_in_range(row_pitch, stencil_pitch_bits))
1396       return false;
1397 
1398  done:
1399    *out_row_pitch = row_pitch;
1400    return true;
1401 }
1402 
1403 bool
isl_surf_init_s(const struct isl_device * dev,struct isl_surf * surf,const struct isl_surf_init_info * restrict info)1404 isl_surf_init_s(const struct isl_device *dev,
1405                 struct isl_surf *surf,
1406                 const struct isl_surf_init_info *restrict info)
1407 {
1408    const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1409 
1410    const struct isl_extent4d logical_level0_px = {
1411       .w = info->width,
1412       .h = info->height,
1413       .d = info->depth,
1414       .a = info->array_len,
1415    };
1416 
1417    enum isl_tiling tiling;
1418    if (!isl_surf_choose_tiling(dev, info, &tiling))
1419       return false;
1420 
1421    struct isl_tile_info tile_info;
1422    isl_tiling_get_info(tiling, fmtl->bpb, &tile_info);
1423 
1424    const enum isl_dim_layout dim_layout =
1425       isl_surf_choose_dim_layout(dev, info->dim, tiling, info->usage);
1426 
1427    enum isl_msaa_layout msaa_layout;
1428    if (!isl_choose_msaa_layout(dev, info, tiling, &msaa_layout))
1429        return false;
1430 
1431    struct isl_extent3d image_align_el;
1432    isl_choose_image_alignment_el(dev, info, tiling, dim_layout, msaa_layout,
1433                                  &image_align_el);
1434 
1435    struct isl_extent3d image_align_sa =
1436       isl_extent3d_el_to_sa(info->format, image_align_el);
1437 
1438    struct isl_extent4d phys_level0_sa;
1439    isl_calc_phys_level0_extent_sa(dev, info, dim_layout, tiling, msaa_layout,
1440                                   &phys_level0_sa);
1441    assert(phys_level0_sa.w % fmtl->bw == 0);
1442    assert(phys_level0_sa.h % fmtl->bh == 0);
1443 
1444    enum isl_array_pitch_span array_pitch_span =
1445       isl_choose_array_pitch_span(dev, info, dim_layout, &phys_level0_sa);
1446 
1447    uint32_t array_pitch_el_rows;
1448    struct isl_extent2d phys_total_el;
1449    isl_calc_phys_total_extent_el(dev, info, &tile_info,
1450                                  dim_layout, msaa_layout,
1451                                  &image_align_sa, &phys_level0_sa,
1452                                  array_pitch_span, &array_pitch_el_rows,
1453                                  &phys_total_el);
1454 
1455    uint32_t row_pitch;
1456    if (!isl_calc_row_pitch(dev, info, &tile_info, dim_layout,
1457                            &phys_total_el, &row_pitch))
1458       return false;
1459 
1460    uint32_t base_alignment;
1461    uint64_t size;
1462    if (tiling == ISL_TILING_LINEAR) {
1463       size = (uint64_t) row_pitch * phys_total_el.h;
1464 
1465       /* From the Broadwell PRM Vol 2d, RENDER_SURFACE_STATE::SurfaceBaseAddress:
1466        *
1467        *    "The Base Address for linear render target surfaces and surfaces
1468        *    accessed with the typed surface read/write data port messages must
1469        *    be element-size aligned, for non-YUV surface formats, or a
1470        *    multiple of 2 element-sizes for YUV surface formats. Other linear
1471        *    surfaces have no alignment requirements (byte alignment is
1472        *    sufficient.)"
1473        */
1474       base_alignment = MAX(1, info->min_alignment);
1475       if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) {
1476          if (isl_format_is_yuv(info->format)) {
1477             base_alignment = MAX(base_alignment, fmtl->bpb / 4);
1478          } else {
1479             base_alignment = MAX(base_alignment, fmtl->bpb / 8);
1480          }
1481       }
1482       base_alignment = isl_round_up_to_power_of_two(base_alignment);
1483    } else {
1484       const uint32_t total_h_tl =
1485          isl_align_div(phys_total_el.h, tile_info.logical_extent_el.height);
1486 
1487       size = (uint64_t) total_h_tl * tile_info.phys_extent_B.height * row_pitch;
1488 
1489       const uint32_t tile_size = tile_info.phys_extent_B.width *
1490                                  tile_info.phys_extent_B.height;
1491       assert(isl_is_pow2(info->min_alignment) && isl_is_pow2(tile_size));
1492       base_alignment = MAX(info->min_alignment, tile_size);
1493    }
1494 
1495    if (ISL_DEV_GEN(dev) < 9) {
1496       /* From the Broadwell PRM Vol 5, Surface Layout:
1497        *
1498        *    "In addition to restrictions on maximum height, width, and depth,
1499        *     surfaces are also restricted to a maximum size in bytes. This
1500        *     maximum is 2 GB for all products and all surface types."
1501        *
1502        * This comment is applicable to all Pre-gen9 platforms.
1503        */
1504       if (size > (uint64_t) 1 << 31)
1505          return false;
1506    } else {
1507       /* From the Skylake PRM Vol 5, Maximum Surface Size in Bytes:
1508        *    "In addition to restrictions on maximum height, width, and depth,
1509        *     surfaces are also restricted to a maximum size of 2^38 bytes.
1510        *     All pixels within the surface must be contained within 2^38 bytes
1511        *     of the base address."
1512        */
1513       if (size > (uint64_t) 1 << 38)
1514          return false;
1515    }
1516 
1517    *surf = (struct isl_surf) {
1518       .dim = info->dim,
1519       .dim_layout = dim_layout,
1520       .msaa_layout = msaa_layout,
1521       .tiling = tiling,
1522       .format = info->format,
1523 
1524       .levels = info->levels,
1525       .samples = info->samples,
1526 
1527       .image_alignment_el = image_align_el,
1528       .logical_level0_px = logical_level0_px,
1529       .phys_level0_sa = phys_level0_sa,
1530 
1531       .size = size,
1532       .alignment = base_alignment,
1533       .row_pitch = row_pitch,
1534       .array_pitch_el_rows = array_pitch_el_rows,
1535       .array_pitch_span = array_pitch_span,
1536 
1537       .usage = info->usage,
1538    };
1539 
1540    return true;
1541 }
1542 
1543 void
isl_surf_get_tile_info(const struct isl_surf * surf,struct isl_tile_info * tile_info)1544 isl_surf_get_tile_info(const struct isl_surf *surf,
1545                        struct isl_tile_info *tile_info)
1546 {
1547    const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
1548    isl_tiling_get_info(surf->tiling, fmtl->bpb, tile_info);
1549 }
1550 
1551 bool
isl_surf_get_hiz_surf(const struct isl_device * dev,const struct isl_surf * surf,struct isl_surf * hiz_surf)1552 isl_surf_get_hiz_surf(const struct isl_device *dev,
1553                       const struct isl_surf *surf,
1554                       struct isl_surf *hiz_surf)
1555 {
1556    assert(ISL_DEV_GEN(dev) >= 5 && ISL_DEV_USE_SEPARATE_STENCIL(dev));
1557 
1558    /* Multisampled depth is always interleaved */
1559    assert(surf->msaa_layout == ISL_MSAA_LAYOUT_NONE ||
1560           surf->msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED);
1561 
1562    /* From the Broadwell PRM Vol. 7, "Hierarchical Depth Buffer":
1563     *
1564     *    "The Surface Type, Height, Width, Depth, Minimum Array Element, Render
1565     *    Target View Extent, and Depth Coordinate Offset X/Y of the
1566     *    hierarchical depth buffer are inherited from the depth buffer. The
1567     *    height and width of the hierarchical depth buffer that must be
1568     *    allocated are computed by the following formulas, where HZ is the
1569     *    hierarchical depth buffer and Z is the depth buffer. The Z_Height,
1570     *    Z_Width, and Z_Depth values given in these formulas are those present
1571     *    in 3DSTATE_DEPTH_BUFFER incremented by one.
1572     *
1573     *    "The value of Z_Height and Z_Width must each be multiplied by 2 before
1574     *    being applied to the table below if Number of Multisamples is set to
1575     *    NUMSAMPLES_4. The value of Z_Height must be multiplied by 2 and
1576     *    Z_Width must be multiplied by 4 before being applied to the table
1577     *    below if Number of Multisamples is set to NUMSAMPLES_8."
1578     *
1579     * In the Sky Lake PRM, the second paragraph is replaced with this:
1580     *
1581     *    "The Z_Height and Z_Width values must equal those present in
1582     *    3DSTATE_DEPTH_BUFFER incremented by one."
1583     *
1584     * In other words, on Sandy Bridge through Broadwell, each 128-bit HiZ
1585     * block corresponds to a region of 8x4 samples in the primary depth
1586     * surface.  On Sky Lake, on the other hand, each HiZ block corresponds to
1587     * a region of 8x4 pixels in the primary depth surface regardless of the
1588     * number of samples.  The dimensions of a HiZ block in both pixels and
1589     * samples are given in the table below:
1590     *
1591     *                    | SNB - BDW |     SKL+
1592     *              ------+-----------+-------------
1593     *                1x  |  8 x 4 sa |   8 x 4 sa
1594     *               MSAA |  8 x 4 px |   8 x 4 px
1595     *              ------+-----------+-------------
1596     *                2x  |  8 x 4 sa |  16 x 4 sa
1597     *               MSAA |  4 x 4 px |   8 x 4 px
1598     *              ------+-----------+-------------
1599     *                4x  |  8 x 4 sa |  16 x 8 sa
1600     *               MSAA |  4 x 2 px |   8 x 4 px
1601     *              ------+-----------+-------------
1602     *                8x  |  8 x 4 sa |  32 x 8 sa
1603     *               MSAA |  2 x 2 px |   8 x 4 px
1604     *              ------+-----------+-------------
1605     *               16x  |    N/A    | 32 x 16 sa
1606     *               MSAA |    N/A    |  8 x  4 px
1607     *              ------+-----------+-------------
1608     *
1609     * There are a number of different ways that this discrepency could be
1610     * handled.  The way we have chosen is to simply make MSAA HiZ have the
1611     * same number of samples as the parent surface pre-Sky Lake and always be
1612     * single-sampled on Sky Lake and above.  Since the block sizes of
1613     * compressed formats are given in samples, this neatly handles everything
1614     * without the need for additional HiZ formats with different block sizes
1615     * on SKL+.
1616     */
1617    const unsigned samples = ISL_DEV_GEN(dev) >= 9 ? 1 : surf->samples;
1618 
1619    return isl_surf_init(dev, hiz_surf,
1620                         .dim = surf->dim,
1621                         .format = ISL_FORMAT_HIZ,
1622                         .width = surf->logical_level0_px.width,
1623                         .height = surf->logical_level0_px.height,
1624                         .depth = surf->logical_level0_px.depth,
1625                         .levels = surf->levels,
1626                         .array_len = surf->logical_level0_px.array_len,
1627                         .samples = samples,
1628                         .usage = ISL_SURF_USAGE_HIZ_BIT,
1629                         .tiling_flags = ISL_TILING_HIZ_BIT);
1630 }
1631 
1632 bool
isl_surf_get_mcs_surf(const struct isl_device * dev,const struct isl_surf * surf,struct isl_surf * mcs_surf)1633 isl_surf_get_mcs_surf(const struct isl_device *dev,
1634                       const struct isl_surf *surf,
1635                       struct isl_surf *mcs_surf)
1636 {
1637    /* It must be multisampled with an array layout */
1638    assert(surf->samples > 1 && surf->msaa_layout == ISL_MSAA_LAYOUT_ARRAY);
1639 
1640    /* The following are true of all multisampled surfaces */
1641    assert(surf->dim == ISL_SURF_DIM_2D);
1642    assert(surf->levels == 1);
1643    assert(surf->logical_level0_px.depth == 1);
1644 
1645    /* The "Auxiliary Surface Pitch" field in RENDER_SURFACE_STATE is only 9
1646     * bits which means the maximum pitch of a compression surface is 512
1647     * tiles or 64KB (since MCS is always Y-tiled).  Since a 16x MCS buffer is
1648     * 64bpp, this gives us a maximum width of 8192 pixels.  We can create
1649     * larger multisampled surfaces, we just can't compress them.   For 2x, 4x,
1650     * and 8x, we have enough room for the full 16k supported by the hardware.
1651     */
1652    if (surf->samples == 16 && surf->logical_level0_px.width > 8192)
1653       return false;
1654 
1655    enum isl_format mcs_format;
1656    switch (surf->samples) {
1657    case 2:  mcs_format = ISL_FORMAT_MCS_2X;  break;
1658    case 4:  mcs_format = ISL_FORMAT_MCS_4X;  break;
1659    case 8:  mcs_format = ISL_FORMAT_MCS_8X;  break;
1660    case 16: mcs_format = ISL_FORMAT_MCS_16X; break;
1661    default:
1662       unreachable("Invalid sample count");
1663    }
1664 
1665    return isl_surf_init(dev, mcs_surf,
1666                         .dim = ISL_SURF_DIM_2D,
1667                         .format = mcs_format,
1668                         .width = surf->logical_level0_px.width,
1669                         .height = surf->logical_level0_px.height,
1670                         .depth = 1,
1671                         .levels = 1,
1672                         .array_len = surf->logical_level0_px.array_len,
1673                         .samples = 1, /* MCS surfaces are really single-sampled */
1674                         .usage = ISL_SURF_USAGE_MCS_BIT,
1675                         .tiling_flags = ISL_TILING_Y0_BIT);
1676 }
1677 
1678 bool
isl_surf_get_ccs_surf(const struct isl_device * dev,const struct isl_surf * surf,struct isl_surf * ccs_surf,uint32_t row_pitch)1679 isl_surf_get_ccs_surf(const struct isl_device *dev,
1680                       const struct isl_surf *surf,
1681                       struct isl_surf *ccs_surf,
1682                       uint32_t row_pitch)
1683 {
1684    assert(surf->samples == 1 && surf->msaa_layout == ISL_MSAA_LAYOUT_NONE);
1685    assert(ISL_DEV_GEN(dev) >= 7);
1686 
1687    if (surf->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT)
1688       return false;
1689 
1690    /* The PRM doesn't say this explicitly, but fast-clears don't appear to
1691     * work for 3D textures until gen9 where the layout of 3D textures changes
1692     * to match 2D array textures.
1693     */
1694    if (ISL_DEV_GEN(dev) <= 8 && surf->dim != ISL_SURF_DIM_2D)
1695       return false;
1696 
1697    /* From the HSW PRM Volume 7: 3D-Media-GPGPU, page 652 (Color Clear of
1698     * Non-MultiSampler Render Target Restrictions):
1699     *
1700     *    "Support is for non-mip-mapped and non-array surface types only."
1701     *
1702     * This restriction is lifted on gen8+.  Technically, it may be possible to
1703     * create a CCS for an arrayed or mipmapped image and only enable CCS_D
1704     * when rendering to the base slice.  However, there is no documentation
1705     * tell us what the hardware would do in that case or what it does if you
1706     * walk off the bases slice.  (Does it ignore CCS or does it start
1707     * scribbling over random memory?)  We play it safe and just follow the
1708     * docs and don't allow CCS_D for arrayed or mip-mapped surfaces.
1709     */
1710    if (ISL_DEV_GEN(dev) <= 7 &&
1711        (surf->levels > 1 || surf->logical_level0_px.array_len > 1))
1712       return false;
1713 
1714    if (isl_format_is_compressed(surf->format))
1715       return false;
1716 
1717    /* TODO: More conditions where it can fail. */
1718 
1719    enum isl_format ccs_format;
1720    if (ISL_DEV_GEN(dev) >= 9) {
1721       if (!isl_tiling_is_any_y(surf->tiling))
1722          return false;
1723 
1724       switch (isl_format_get_layout(surf->format)->bpb) {
1725       case 32:    ccs_format = ISL_FORMAT_GEN9_CCS_32BPP;   break;
1726       case 64:    ccs_format = ISL_FORMAT_GEN9_CCS_64BPP;   break;
1727       case 128:   ccs_format = ISL_FORMAT_GEN9_CCS_128BPP;  break;
1728       default:
1729          return false;
1730       }
1731    } else if (surf->tiling == ISL_TILING_Y0) {
1732       switch (isl_format_get_layout(surf->format)->bpb) {
1733       case 32:    ccs_format = ISL_FORMAT_GEN7_CCS_32BPP_Y;    break;
1734       case 64:    ccs_format = ISL_FORMAT_GEN7_CCS_64BPP_Y;    break;
1735       case 128:   ccs_format = ISL_FORMAT_GEN7_CCS_128BPP_Y;   break;
1736       default:
1737          return false;
1738       }
1739    } else if (surf->tiling == ISL_TILING_X) {
1740       switch (isl_format_get_layout(surf->format)->bpb) {
1741       case 32:    ccs_format = ISL_FORMAT_GEN7_CCS_32BPP_X;    break;
1742       case 64:    ccs_format = ISL_FORMAT_GEN7_CCS_64BPP_X;    break;
1743       case 128:   ccs_format = ISL_FORMAT_GEN7_CCS_128BPP_X;   break;
1744       default:
1745          return false;
1746       }
1747    } else {
1748       return false;
1749    }
1750 
1751    return isl_surf_init(dev, ccs_surf,
1752                         .dim = surf->dim,
1753                         .format = ccs_format,
1754                         .width = surf->logical_level0_px.width,
1755                         .height = surf->logical_level0_px.height,
1756                         .depth = surf->logical_level0_px.depth,
1757                         .levels = surf->levels,
1758                         .array_len = surf->logical_level0_px.array_len,
1759                         .samples = 1,
1760                         .row_pitch = row_pitch,
1761                         .usage = ISL_SURF_USAGE_CCS_BIT,
1762                         .tiling_flags = ISL_TILING_CCS_BIT);
1763 }
1764 
1765 #define isl_genX_call(dev, func, ...)              \
1766    switch (ISL_DEV_GEN(dev)) {                     \
1767    case 4:                                         \
1768       /* G45 surface state is the same as gen5 */  \
1769       if (ISL_DEV_IS_G4X(dev)) {                   \
1770          isl_gen5_##func(__VA_ARGS__);             \
1771       } else {                                     \
1772          isl_gen4_##func(__VA_ARGS__);             \
1773       }                                            \
1774       break;                                       \
1775    case 5:                                         \
1776       isl_gen5_##func(__VA_ARGS__);                \
1777       break;                                       \
1778    case 6:                                         \
1779       isl_gen6_##func(__VA_ARGS__);                \
1780       break;                                       \
1781    case 7:                                         \
1782       if (ISL_DEV_IS_HASWELL(dev)) {               \
1783          isl_gen75_##func(__VA_ARGS__);            \
1784       } else {                                     \
1785          isl_gen7_##func(__VA_ARGS__);             \
1786       }                                            \
1787       break;                                       \
1788    case 8:                                         \
1789       isl_gen8_##func(__VA_ARGS__);                \
1790       break;                                       \
1791    case 9:                                         \
1792       isl_gen9_##func(__VA_ARGS__);                \
1793       break;                                       \
1794    case 10:                                        \
1795       isl_gen10_##func(__VA_ARGS__);               \
1796       break;                                       \
1797    default:                                        \
1798       assert(!"Unknown hardware generation");      \
1799    }
1800 
1801 void
isl_surf_fill_state_s(const struct isl_device * dev,void * state,const struct isl_surf_fill_state_info * restrict info)1802 isl_surf_fill_state_s(const struct isl_device *dev, void *state,
1803                       const struct isl_surf_fill_state_info *restrict info)
1804 {
1805 #ifndef NDEBUG
1806    isl_surf_usage_flags_t _base_usage =
1807       info->view->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT |
1808                            ISL_SURF_USAGE_TEXTURE_BIT |
1809                            ISL_SURF_USAGE_STORAGE_BIT);
1810    /* They may only specify one of the above bits at a time */
1811    assert(__builtin_popcount(_base_usage) == 1);
1812    /* The only other allowed bit is ISL_SURF_USAGE_CUBE_BIT */
1813    assert((info->view->usage & ~ISL_SURF_USAGE_CUBE_BIT) == _base_usage);
1814 #endif
1815 
1816    if (info->surf->dim == ISL_SURF_DIM_3D) {
1817       assert(info->view->base_array_layer + info->view->array_len <=
1818              info->surf->logical_level0_px.depth);
1819    } else {
1820       assert(info->view->base_array_layer + info->view->array_len <=
1821              info->surf->logical_level0_px.array_len);
1822    }
1823 
1824    isl_genX_call(dev, surf_fill_state_s, dev, state, info);
1825 }
1826 
1827 void
isl_buffer_fill_state_s(const struct isl_device * dev,void * state,const struct isl_buffer_fill_state_info * restrict info)1828 isl_buffer_fill_state_s(const struct isl_device *dev, void *state,
1829                         const struct isl_buffer_fill_state_info *restrict info)
1830 {
1831    isl_genX_call(dev, buffer_fill_state_s, state, info);
1832 }
1833 
1834 void
isl_null_fill_state(const struct isl_device * dev,void * state,struct isl_extent3d size)1835 isl_null_fill_state(const struct isl_device *dev, void *state,
1836                     struct isl_extent3d size)
1837 {
1838    isl_genX_call(dev, null_fill_state, state, size);
1839 }
1840 
1841 void
isl_emit_depth_stencil_hiz_s(const struct isl_device * dev,void * batch,const struct isl_depth_stencil_hiz_emit_info * restrict info)1842 isl_emit_depth_stencil_hiz_s(const struct isl_device *dev, void *batch,
1843                              const struct isl_depth_stencil_hiz_emit_info *restrict info)
1844 {
1845    if (info->depth_surf && info->stencil_surf) {
1846       if (!dev->info->has_hiz_and_separate_stencil) {
1847          assert(info->depth_surf == info->stencil_surf);
1848          assert(info->depth_address == info->stencil_address);
1849       }
1850       assert(info->depth_surf->dim == info->stencil_surf->dim);
1851    }
1852 
1853    if (info->depth_surf) {
1854       assert((info->depth_surf->usage & ISL_SURF_USAGE_DEPTH_BIT));
1855       if (info->depth_surf->dim == ISL_SURF_DIM_3D) {
1856          assert(info->view->base_array_layer + info->view->array_len <=
1857                 info->depth_surf->logical_level0_px.depth);
1858       } else {
1859          assert(info->view->base_array_layer + info->view->array_len <=
1860                 info->depth_surf->logical_level0_px.array_len);
1861       }
1862    }
1863 
1864    if (info->stencil_surf) {
1865       assert((info->stencil_surf->usage & ISL_SURF_USAGE_STENCIL_BIT));
1866       if (info->stencil_surf->dim == ISL_SURF_DIM_3D) {
1867          assert(info->view->base_array_layer + info->view->array_len <=
1868                 info->stencil_surf->logical_level0_px.depth);
1869       } else {
1870          assert(info->view->base_array_layer + info->view->array_len <=
1871                 info->stencil_surf->logical_level0_px.array_len);
1872       }
1873    }
1874 
1875    isl_genX_call(dev, emit_depth_stencil_hiz_s, dev, batch, info);
1876 }
1877 
1878 /**
1879  * A variant of isl_surf_get_image_offset_sa() specific to
1880  * ISL_DIM_LAYOUT_GEN4_2D.
1881  */
1882 static void
get_image_offset_sa_gen4_2d(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t * x_offset_sa,uint32_t * y_offset_sa)1883 get_image_offset_sa_gen4_2d(const struct isl_surf *surf,
1884                             uint32_t level, uint32_t logical_array_layer,
1885                             uint32_t *x_offset_sa,
1886                             uint32_t *y_offset_sa)
1887 {
1888    assert(level < surf->levels);
1889    if (surf->dim == ISL_SURF_DIM_3D)
1890       assert(logical_array_layer < surf->logical_level0_px.depth);
1891    else
1892       assert(logical_array_layer < surf->logical_level0_px.array_len);
1893 
1894    const struct isl_extent3d image_align_sa =
1895       isl_surf_get_image_alignment_sa(surf);
1896 
1897    const uint32_t W0 = surf->phys_level0_sa.width;
1898    const uint32_t H0 = surf->phys_level0_sa.height;
1899 
1900    const uint32_t phys_layer = logical_array_layer *
1901       (surf->msaa_layout == ISL_MSAA_LAYOUT_ARRAY ? surf->samples : 1);
1902 
1903    uint32_t x = 0;
1904    uint32_t y = phys_layer * isl_surf_get_array_pitch_sa_rows(surf);
1905 
1906    for (uint32_t l = 0; l < level; ++l) {
1907       if (l == 1) {
1908          uint32_t W = isl_minify(W0, l);
1909          x += isl_align_npot(W, image_align_sa.w);
1910       } else {
1911          uint32_t H = isl_minify(H0, l);
1912          y += isl_align_npot(H, image_align_sa.h);
1913       }
1914    }
1915 
1916    *x_offset_sa = x;
1917    *y_offset_sa = y;
1918 }
1919 
1920 /**
1921  * A variant of isl_surf_get_image_offset_sa() specific to
1922  * ISL_DIM_LAYOUT_GEN4_3D.
1923  */
1924 static void
get_image_offset_sa_gen4_3d(const struct isl_surf * surf,uint32_t level,uint32_t logical_z_offset_px,uint32_t * x_offset_sa,uint32_t * y_offset_sa)1925 get_image_offset_sa_gen4_3d(const struct isl_surf *surf,
1926                             uint32_t level, uint32_t logical_z_offset_px,
1927                             uint32_t *x_offset_sa,
1928                             uint32_t *y_offset_sa)
1929 {
1930    assert(level < surf->levels);
1931    if (surf->dim == ISL_SURF_DIM_3D) {
1932       assert(surf->phys_level0_sa.array_len == 1);
1933       assert(logical_z_offset_px < isl_minify(surf->phys_level0_sa.depth, level));
1934    } else {
1935       assert(surf->dim == ISL_SURF_DIM_2D);
1936       assert(surf->usage & ISL_SURF_USAGE_CUBE_BIT);
1937       assert(surf->phys_level0_sa.array_len == 6);
1938       assert(logical_z_offset_px < surf->phys_level0_sa.array_len);
1939    }
1940 
1941    const struct isl_extent3d image_align_sa =
1942       isl_surf_get_image_alignment_sa(surf);
1943 
1944    const uint32_t W0 = surf->phys_level0_sa.width;
1945    const uint32_t H0 = surf->phys_level0_sa.height;
1946    const uint32_t D0 = surf->phys_level0_sa.depth;
1947    const uint32_t AL = surf->phys_level0_sa.array_len;
1948 
1949    uint32_t x = 0;
1950    uint32_t y = 0;
1951 
1952    for (uint32_t l = 0; l < level; ++l) {
1953       const uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa.h);
1954       const uint32_t level_d =
1955          isl_align_npot(surf->dim == ISL_SURF_DIM_3D ? isl_minify(D0, l) : AL,
1956                         image_align_sa.d);
1957       const uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l);
1958 
1959       y += level_h * max_layers_vert;
1960    }
1961 
1962    const uint32_t level_w = isl_align_npot(isl_minify(W0, level), image_align_sa.w);
1963    const uint32_t level_h = isl_align_npot(isl_minify(H0, level), image_align_sa.h);
1964    const uint32_t level_d =
1965       isl_align_npot(surf->dim == ISL_SURF_DIM_3D ? isl_minify(D0, level) : AL,
1966                      image_align_sa.d);
1967 
1968    const uint32_t max_layers_horiz = MIN(level_d, 1u << level);
1969 
1970    x += level_w * (logical_z_offset_px % max_layers_horiz);
1971    y += level_h * (logical_z_offset_px / max_layers_horiz);
1972 
1973    *x_offset_sa = x;
1974    *y_offset_sa = y;
1975 }
1976 
1977 static void
get_image_offset_sa_gen6_stencil_hiz(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t * x_offset_sa,uint32_t * y_offset_sa)1978 get_image_offset_sa_gen6_stencil_hiz(const struct isl_surf *surf,
1979                                      uint32_t level,
1980                                      uint32_t logical_array_layer,
1981                                      uint32_t *x_offset_sa,
1982                                      uint32_t *y_offset_sa)
1983 {
1984    assert(level < surf->levels);
1985    assert(surf->logical_level0_px.depth == 1);
1986    assert(logical_array_layer < surf->logical_level0_px.array_len);
1987 
1988    const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
1989 
1990    const struct isl_extent3d image_align_sa =
1991       isl_surf_get_image_alignment_sa(surf);
1992 
1993    struct isl_tile_info tile_info;
1994    isl_tiling_get_info(surf->tiling, fmtl->bpb, &tile_info);
1995    const struct isl_extent2d tile_extent_sa = {
1996       .w = tile_info.logical_extent_el.w * fmtl->bw,
1997       .h = tile_info.logical_extent_el.h * fmtl->bh,
1998    };
1999    /* Tile size is a multiple of image alignment */
2000    assert(tile_extent_sa.w % image_align_sa.w == 0);
2001    assert(tile_extent_sa.h % image_align_sa.h == 0);
2002 
2003    const uint32_t W0 = surf->phys_level0_sa.w;
2004    const uint32_t H0 = surf->phys_level0_sa.h;
2005 
2006    /* Each image has the same height as LOD0 because the hardware thinks
2007     * everything is LOD0
2008     */
2009    const uint32_t H = isl_align(H0, image_align_sa.h);
2010 
2011    /* Quick sanity check for consistency */
2012    if (surf->phys_level0_sa.array_len > 1)
2013       assert(surf->array_pitch_el_rows == isl_assert_div(H, fmtl->bh));
2014 
2015    uint32_t x = 0, y = 0;
2016    for (uint32_t l = 0; l < level; ++l) {
2017       const uint32_t W = isl_minify(W0, l);
2018 
2019       const uint32_t w = isl_align(W, tile_extent_sa.w);
2020       const uint32_t h = isl_align(H * surf->phys_level0_sa.a,
2021                                    tile_extent_sa.h);
2022 
2023       if (l == 0) {
2024          y += h;
2025       } else {
2026          x += w;
2027       }
2028    }
2029 
2030    y += H * logical_array_layer;
2031 
2032    *x_offset_sa = x;
2033    *y_offset_sa = y;
2034 }
2035 
2036 /**
2037  * A variant of isl_surf_get_image_offset_sa() specific to
2038  * ISL_DIM_LAYOUT_GEN9_1D.
2039  */
2040 static void
get_image_offset_sa_gen9_1d(const struct isl_surf * surf,uint32_t level,uint32_t layer,uint32_t * x_offset_sa,uint32_t * y_offset_sa)2041 get_image_offset_sa_gen9_1d(const struct isl_surf *surf,
2042                             uint32_t level, uint32_t layer,
2043                             uint32_t *x_offset_sa,
2044                             uint32_t *y_offset_sa)
2045 {
2046    assert(level < surf->levels);
2047    assert(layer < surf->phys_level0_sa.array_len);
2048    assert(surf->phys_level0_sa.height == 1);
2049    assert(surf->phys_level0_sa.depth == 1);
2050    assert(surf->samples == 1);
2051 
2052    const uint32_t W0 = surf->phys_level0_sa.width;
2053    const struct isl_extent3d image_align_sa =
2054       isl_surf_get_image_alignment_sa(surf);
2055 
2056    uint32_t x = 0;
2057 
2058    for (uint32_t l = 0; l < level; ++l) {
2059       uint32_t W = isl_minify(W0, l);
2060       uint32_t w = isl_align_npot(W, image_align_sa.w);
2061 
2062       x += w;
2063    }
2064 
2065    *x_offset_sa = x;
2066    *y_offset_sa = layer * isl_surf_get_array_pitch_sa_rows(surf);
2067 }
2068 
2069 /**
2070  * Calculate the offset, in units of surface samples, to a subimage in the
2071  * surface.
2072  *
2073  * @invariant level < surface levels
2074  * @invariant logical_array_layer < logical array length of surface
2075  * @invariant logical_z_offset_px < logical depth of surface at level
2076  */
2077 void
isl_surf_get_image_offset_sa(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t logical_z_offset_px,uint32_t * x_offset_sa,uint32_t * y_offset_sa)2078 isl_surf_get_image_offset_sa(const struct isl_surf *surf,
2079                              uint32_t level,
2080                              uint32_t logical_array_layer,
2081                              uint32_t logical_z_offset_px,
2082                              uint32_t *x_offset_sa,
2083                              uint32_t *y_offset_sa)
2084 {
2085    assert(level < surf->levels);
2086    assert(logical_array_layer < surf->logical_level0_px.array_len);
2087    assert(logical_z_offset_px
2088           < isl_minify(surf->logical_level0_px.depth, level));
2089 
2090    switch (surf->dim_layout) {
2091    case ISL_DIM_LAYOUT_GEN9_1D:
2092       get_image_offset_sa_gen9_1d(surf, level, logical_array_layer,
2093                                   x_offset_sa, y_offset_sa);
2094       break;
2095    case ISL_DIM_LAYOUT_GEN4_2D:
2096       get_image_offset_sa_gen4_2d(surf, level, logical_array_layer
2097                                   + logical_z_offset_px,
2098                                   x_offset_sa, y_offset_sa);
2099       break;
2100    case ISL_DIM_LAYOUT_GEN4_3D:
2101       get_image_offset_sa_gen4_3d(surf, level, logical_array_layer +
2102                                   logical_z_offset_px,
2103                                   x_offset_sa, y_offset_sa);
2104       break;
2105    case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
2106       get_image_offset_sa_gen6_stencil_hiz(surf, level, logical_array_layer +
2107                                            logical_z_offset_px,
2108                                            x_offset_sa, y_offset_sa);
2109       break;
2110 
2111    default:
2112       unreachable("not reached");
2113    }
2114 }
2115 
2116 void
isl_surf_get_image_offset_el(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t logical_z_offset_px,uint32_t * x_offset_el,uint32_t * y_offset_el)2117 isl_surf_get_image_offset_el(const struct isl_surf *surf,
2118                              uint32_t level,
2119                              uint32_t logical_array_layer,
2120                              uint32_t logical_z_offset_px,
2121                              uint32_t *x_offset_el,
2122                              uint32_t *y_offset_el)
2123 {
2124    const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
2125 
2126    assert(level < surf->levels);
2127    assert(logical_array_layer < surf->logical_level0_px.array_len);
2128    assert(logical_z_offset_px
2129           < isl_minify(surf->logical_level0_px.depth, level));
2130 
2131    uint32_t x_offset_sa, y_offset_sa;
2132    isl_surf_get_image_offset_sa(surf, level,
2133                                 logical_array_layer,
2134                                 logical_z_offset_px,
2135                                 &x_offset_sa,
2136                                 &y_offset_sa);
2137 
2138    *x_offset_el = x_offset_sa / fmtl->bw;
2139    *y_offset_el = y_offset_sa / fmtl->bh;
2140 }
2141 
2142 void
isl_surf_get_image_offset_B_tile_sa(const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t logical_z_offset_px,uint32_t * offset_B,uint32_t * x_offset_sa,uint32_t * y_offset_sa)2143 isl_surf_get_image_offset_B_tile_sa(const struct isl_surf *surf,
2144                                     uint32_t level,
2145                                     uint32_t logical_array_layer,
2146                                     uint32_t logical_z_offset_px,
2147                                     uint32_t *offset_B,
2148                                     uint32_t *x_offset_sa,
2149                                     uint32_t *y_offset_sa)
2150 {
2151    const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
2152 
2153    uint32_t total_x_offset_el, total_y_offset_el;
2154    isl_surf_get_image_offset_el(surf, level, logical_array_layer,
2155                                 logical_z_offset_px,
2156                                 &total_x_offset_el,
2157                                 &total_y_offset_el);
2158 
2159    uint32_t x_offset_el, y_offset_el;
2160    isl_tiling_get_intratile_offset_el(surf->tiling, fmtl->bpb,
2161                                       surf->row_pitch,
2162                                       total_x_offset_el,
2163                                       total_y_offset_el,
2164                                       offset_B,
2165                                       &x_offset_el,
2166                                       &y_offset_el);
2167 
2168    if (x_offset_sa) {
2169       *x_offset_sa = x_offset_el * fmtl->bw;
2170    } else {
2171       assert(x_offset_el == 0);
2172    }
2173 
2174    if (y_offset_sa) {
2175       *y_offset_sa = y_offset_el * fmtl->bh;
2176    } else {
2177       assert(y_offset_el == 0);
2178    }
2179 }
2180 
2181 void
isl_surf_get_image_surf(const struct isl_device * dev,const struct isl_surf * surf,uint32_t level,uint32_t logical_array_layer,uint32_t logical_z_offset_px,struct isl_surf * image_surf,uint32_t * offset_B,uint32_t * x_offset_sa,uint32_t * y_offset_sa)2182 isl_surf_get_image_surf(const struct isl_device *dev,
2183                         const struct isl_surf *surf,
2184                         uint32_t level,
2185                         uint32_t logical_array_layer,
2186                         uint32_t logical_z_offset_px,
2187                         struct isl_surf *image_surf,
2188                         uint32_t *offset_B,
2189                         uint32_t *x_offset_sa,
2190                         uint32_t *y_offset_sa)
2191 {
2192    isl_surf_get_image_offset_B_tile_sa(surf,
2193                                        level,
2194                                        logical_array_layer,
2195                                        logical_z_offset_px,
2196                                        offset_B,
2197                                        x_offset_sa,
2198                                        y_offset_sa);
2199 
2200    /* Even for cube maps there will be only single face, therefore drop the
2201     * corresponding flag if present.
2202     */
2203    const isl_surf_usage_flags_t usage =
2204       surf->usage & (~ISL_SURF_USAGE_CUBE_BIT);
2205 
2206    bool ok UNUSED;
2207    ok = isl_surf_init(dev, image_surf,
2208                       .dim = ISL_SURF_DIM_2D,
2209                       .format = surf->format,
2210                       .width = isl_minify(surf->logical_level0_px.w, level),
2211                       .height = isl_minify(surf->logical_level0_px.h, level),
2212                       .depth = 1,
2213                       .levels = 1,
2214                       .array_len = 1,
2215                       .samples = surf->samples,
2216                       .row_pitch = surf->row_pitch,
2217                       .usage = usage,
2218                       .tiling_flags = (1 << surf->tiling));
2219    assert(ok);
2220 }
2221 
2222 void
isl_tiling_get_intratile_offset_el(enum isl_tiling tiling,uint32_t bpb,uint32_t row_pitch,uint32_t total_x_offset_el,uint32_t total_y_offset_el,uint32_t * base_address_offset,uint32_t * x_offset_el,uint32_t * y_offset_el)2223 isl_tiling_get_intratile_offset_el(enum isl_tiling tiling,
2224                                    uint32_t bpb,
2225                                    uint32_t row_pitch,
2226                                    uint32_t total_x_offset_el,
2227                                    uint32_t total_y_offset_el,
2228                                    uint32_t *base_address_offset,
2229                                    uint32_t *x_offset_el,
2230                                    uint32_t *y_offset_el)
2231 {
2232    if (tiling == ISL_TILING_LINEAR) {
2233       assert(bpb % 8 == 0);
2234       *base_address_offset = total_y_offset_el * row_pitch +
2235                              total_x_offset_el * (bpb / 8);
2236       *x_offset_el = 0;
2237       *y_offset_el = 0;
2238       return;
2239    }
2240 
2241    struct isl_tile_info tile_info;
2242    isl_tiling_get_info(tiling, bpb, &tile_info);
2243 
2244    assert(row_pitch % tile_info.phys_extent_B.width == 0);
2245 
2246    /* For non-power-of-two formats, we need the address to be both tile and
2247     * element-aligned.  The easiest way to achieve this is to work with a tile
2248     * that is three times as wide as the regular tile.
2249     *
2250     * The tile info returned by get_tile_info has a logical size that is an
2251     * integer number of tile_info.format_bpb size elements.  To scale the
2252     * tile, we scale up the physical width and then treat the logical tile
2253     * size as if it has bpb size elements.
2254     */
2255    const uint32_t tile_el_scale = bpb / tile_info.format_bpb;
2256    tile_info.phys_extent_B.width *= tile_el_scale;
2257 
2258    /* Compute the offset into the tile */
2259    *x_offset_el = total_x_offset_el % tile_info.logical_extent_el.w;
2260    *y_offset_el = total_y_offset_el % tile_info.logical_extent_el.h;
2261 
2262    /* Compute the offset of the tile in units of whole tiles */
2263    uint32_t x_offset_tl = total_x_offset_el / tile_info.logical_extent_el.w;
2264    uint32_t y_offset_tl = total_y_offset_el / tile_info.logical_extent_el.h;
2265 
2266    *base_address_offset =
2267       y_offset_tl * tile_info.phys_extent_B.h * row_pitch +
2268       x_offset_tl * tile_info.phys_extent_B.h * tile_info.phys_extent_B.w;
2269 }
2270 
2271 uint32_t
isl_surf_get_depth_format(const struct isl_device * dev,const struct isl_surf * surf)2272 isl_surf_get_depth_format(const struct isl_device *dev,
2273                           const struct isl_surf *surf)
2274 {
2275    /* Support for separate stencil buffers began in gen5. Support for
2276     * interleaved depthstencil buffers ceased in gen7. The intermediate gens,
2277     * those that supported separate and interleaved stencil, were gen5 and
2278     * gen6.
2279     *
2280     * For a list of all available formats, see the Sandybridge PRM >> Volume
2281     * 2 Part 1: 3D/Media - 3D Pipeline >> 3DSTATE_DEPTH_BUFFER >> Surface
2282     * Format (p321).
2283     */
2284 
2285    bool has_stencil = surf->usage & ISL_SURF_USAGE_STENCIL_BIT;
2286 
2287    assert(surf->usage & ISL_SURF_USAGE_DEPTH_BIT);
2288 
2289    if (has_stencil)
2290       assert(ISL_DEV_GEN(dev) < 7);
2291 
2292    switch (surf->format) {
2293    default:
2294       unreachable("bad isl depth format");
2295    case ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS:
2296       assert(ISL_DEV_GEN(dev) < 7);
2297       return 0; /* D32_FLOAT_S8X24_UINT */
2298    case ISL_FORMAT_R32_FLOAT:
2299       assert(!has_stencil);
2300       return 1; /* D32_FLOAT */
2301    case ISL_FORMAT_R24_UNORM_X8_TYPELESS:
2302       if (has_stencil) {
2303          assert(ISL_DEV_GEN(dev) < 7);
2304          return 2; /* D24_UNORM_S8_UINT */
2305       } else {
2306          assert(ISL_DEV_GEN(dev) >= 5);
2307          return 3; /* D24_UNORM_X8_UINT */
2308       }
2309    case ISL_FORMAT_R16_UNORM:
2310       assert(!has_stencil);
2311       return 5; /* D16_UNORM */
2312    }
2313 }
2314