1 /*
2  * Copyright 2008 Ben Skeggs
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "nvc0/nvc0_context.h"
24 #include "nvc0/nvc0_resource.h"
25 #include "nvc0/gm107_texture.xml.h"
26 #include "nvc0/nvc0_compute.xml.h"
27 #include "nv50/g80_texture.xml.h"
28 #include "nv50/g80_defs.xml.h"
29 
30 #include "util/format/u_format.h"
31 
32 #define NVE4_TIC_ENTRY_INVALID 0x000fffff
33 #define NVE4_TSC_ENTRY_INVALID 0xfff00000
34 
35 static inline uint32_t
nv50_tic_swizzle(const struct nvc0_format * fmt,unsigned swz,bool tex_int)36 nv50_tic_swizzle(const struct nvc0_format *fmt, unsigned swz, bool tex_int)
37 {
38    switch (swz) {
39    case PIPE_SWIZZLE_X  : return fmt->tic.src_x;
40    case PIPE_SWIZZLE_Y: return fmt->tic.src_y;
41    case PIPE_SWIZZLE_Z : return fmt->tic.src_z;
42    case PIPE_SWIZZLE_W: return fmt->tic.src_w;
43    case PIPE_SWIZZLE_1:
44       return tex_int ? G80_TIC_SOURCE_ONE_INT : G80_TIC_SOURCE_ONE_FLOAT;
45    case PIPE_SWIZZLE_0:
46    default:
47       return G80_TIC_SOURCE_ZERO;
48    }
49 }
50 
51 struct pipe_sampler_view *
nvc0_create_sampler_view(struct pipe_context * pipe,struct pipe_resource * res,const struct pipe_sampler_view * templ)52 nvc0_create_sampler_view(struct pipe_context *pipe,
53                          struct pipe_resource *res,
54                          const struct pipe_sampler_view *templ)
55 {
56    uint32_t flags = 0;
57 
58    if (templ->target == PIPE_TEXTURE_RECT || templ->target == PIPE_BUFFER)
59       flags |= NV50_TEXVIEW_SCALED_COORDS;
60 
61    return nvc0_create_texture_view(pipe, res, templ, flags, templ->target);
62 }
63 
64 static struct pipe_sampler_view *
gm107_create_texture_view(struct pipe_context * pipe,struct pipe_resource * texture,const struct pipe_sampler_view * templ,uint32_t flags,enum pipe_texture_target target)65 gm107_create_texture_view(struct pipe_context *pipe,
66                           struct pipe_resource *texture,
67                           const struct pipe_sampler_view *templ,
68                           uint32_t flags,
69                           enum pipe_texture_target target)
70 {
71    const struct util_format_description *desc;
72    const struct nvc0_format *fmt;
73    uint64_t address;
74    uint32_t *tic;
75    uint32_t swz[4];
76    uint32_t width, height;
77    uint32_t depth;
78    struct nv50_tic_entry *view;
79    struct nv50_miptree *mt;
80    bool tex_int;
81 
82    view = MALLOC_STRUCT(nv50_tic_entry);
83    if (!view)
84       return NULL;
85    mt = nv50_miptree(texture);
86 
87    view->pipe = *templ;
88    view->pipe.reference.count = 1;
89    view->pipe.texture = NULL;
90    view->pipe.context = pipe;
91 
92    view->id = -1;
93    view->bindless = 0;
94 
95    pipe_resource_reference(&view->pipe.texture, texture);
96 
97    tic = &view->tic[0];
98 
99    desc = util_format_description(view->pipe.format);
100    tex_int = util_format_is_pure_integer(view->pipe.format);
101 
102    fmt = &nvc0_format_table[view->pipe.format];
103    swz[0] = nv50_tic_swizzle(fmt, view->pipe.swizzle_r, tex_int);
104    swz[1] = nv50_tic_swizzle(fmt, view->pipe.swizzle_g, tex_int);
105    swz[2] = nv50_tic_swizzle(fmt, view->pipe.swizzle_b, tex_int);
106    swz[3] = nv50_tic_swizzle(fmt, view->pipe.swizzle_a, tex_int);
107 
108    tic[0]  = fmt->tic.format << GM107_TIC2_0_COMPONENTS_SIZES__SHIFT;
109    tic[0] |= fmt->tic.type_r << GM107_TIC2_0_R_DATA_TYPE__SHIFT;
110    tic[0] |= fmt->tic.type_g << GM107_TIC2_0_G_DATA_TYPE__SHIFT;
111    tic[0] |= fmt->tic.type_b << GM107_TIC2_0_B_DATA_TYPE__SHIFT;
112    tic[0] |= fmt->tic.type_a << GM107_TIC2_0_A_DATA_TYPE__SHIFT;
113    tic[0] |= swz[0] << GM107_TIC2_0_X_SOURCE__SHIFT;
114    tic[0] |= swz[1] << GM107_TIC2_0_Y_SOURCE__SHIFT;
115    tic[0] |= swz[2] << GM107_TIC2_0_Z_SOURCE__SHIFT;
116    tic[0] |= swz[3] << GM107_TIC2_0_W_SOURCE__SHIFT;
117 
118    address = mt->base.address;
119 
120    tic[3]  = GM107_TIC2_3_LOD_ANISO_QUALITY_2;
121    tic[4]  = GM107_TIC2_4_SECTOR_PROMOTION_PROMOTE_TO_2_V;
122    tic[4] |= GM107_TIC2_4_BORDER_SIZE_SAMPLER_COLOR;
123 
124    if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
125       tic[4] |= GM107_TIC2_4_SRGB_CONVERSION;
126 
127    if (!(flags & NV50_TEXVIEW_SCALED_COORDS))
128       tic[5] = GM107_TIC2_5_NORMALIZED_COORDS;
129    else
130       tic[5] = 0;
131 
132    /* check for linear storage type */
133    if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) {
134       if (texture->target == PIPE_BUFFER) {
135          assert(!(tic[5] & GM107_TIC2_5_NORMALIZED_COORDS));
136          width = view->pipe.u.buf.size / (desc->block.bits / 8) - 1;
137          address +=
138             view->pipe.u.buf.offset;
139          tic[2]  = GM107_TIC2_2_HEADER_VERSION_ONE_D_BUFFER;
140          tic[3] |= width >> 16;
141          tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D_BUFFER;
142          tic[4] |= width & 0xffff;
143       } else {
144          assert(!(mt->level[0].pitch & 0x1f));
145          /* must be 2D texture without mip maps */
146          tic[2]  = GM107_TIC2_2_HEADER_VERSION_PITCH;
147          tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D_NO_MIPMAP;
148          tic[3] |= mt->level[0].pitch >> 5;
149          tic[4] |= mt->base.base.width0 - 1;
150          tic[5] |= 0 << GM107_TIC2_5_DEPTH_MINUS_ONE__SHIFT;
151          tic[5] |= mt->base.base.height0 - 1;
152       }
153       tic[1]  = address;
154       tic[2] |= address >> 32;
155       tic[6]  = 0;
156       tic[7]  = 0;
157       return &view->pipe;
158    }
159 
160    tic[2]  = GM107_TIC2_2_HEADER_VERSION_BLOCKLINEAR;
161    tic[3] |=
162       ((mt->level[0].tile_mode & 0x0f0) >> 4 << 3) |
163       ((mt->level[0].tile_mode & 0xf00) >> 8 << 6);
164 
165    depth = MAX2(mt->base.base.array_size, mt->base.base.depth0);
166 
167    if (mt->base.base.array_size > 1) {
168       /* there doesn't seem to be a base layer field in TIC */
169       address += view->pipe.u.tex.first_layer * mt->layer_stride;
170       depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1;
171    }
172    tic[1]  = address;
173    tic[2] |= address >> 32;
174 
175    switch (target) {
176    case PIPE_TEXTURE_1D:
177       tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D;
178       break;
179    case PIPE_TEXTURE_2D:
180       tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D;
181       break;
182    case PIPE_TEXTURE_RECT:
183       tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D;
184       break;
185    case PIPE_TEXTURE_3D:
186       tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_THREE_D;
187       break;
188    case PIPE_TEXTURE_CUBE:
189       depth /= 6;
190       tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_CUBEMAP;
191       break;
192    case PIPE_TEXTURE_1D_ARRAY:
193       tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D_ARRAY;
194       break;
195    case PIPE_TEXTURE_2D_ARRAY:
196       tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D_ARRAY;
197       break;
198    case PIPE_TEXTURE_CUBE_ARRAY:
199       depth /= 6;
200       tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_CUBE_ARRAY;
201       break;
202    default:
203       unreachable("unexpected/invalid texture target");
204    }
205 
206    tic[3] |= (flags & NV50_TEXVIEW_FILTER_MSAA8) ?
207              GM107_TIC2_3_USE_HEADER_OPT_CONTROL :
208              GM107_TIC2_3_LOD_ANISO_QUALITY_HIGH |
209              GM107_TIC2_3_LOD_ISO_QUALITY_HIGH;
210 
211    if (flags & (NV50_TEXVIEW_ACCESS_RESOLVE | NV50_TEXVIEW_IMAGE_GM107)) {
212       width = mt->base.base.width0 << mt->ms_x;
213       height = mt->base.base.height0 << mt->ms_y;
214    } else {
215       width = mt->base.base.width0;
216       height = mt->base.base.height0;
217    }
218 
219    tic[4] |= width - 1;
220 
221    tic[5] |= (height - 1) & 0xffff;
222    tic[5] |= (depth - 1) << GM107_TIC2_5_DEPTH_MINUS_ONE__SHIFT;
223    tic[3] |= mt->base.base.last_level << GM107_TIC2_3_MAX_MIP_LEVEL__SHIFT;
224 
225    /* sampling points: (?) */
226    if ((flags & NV50_TEXVIEW_ACCESS_RESOLVE) && mt->ms_x > 1) {
227       tic[6]  = GM107_TIC2_6_ANISO_FINE_SPREAD_MODIFIER_CONST_TWO;
228       tic[6] |= GM107_TIC2_6_MAX_ANISOTROPY_2_TO_1;
229    } else {
230       tic[6]  = GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC_TWO;
231       tic[6] |= GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC_ONE;
232    }
233 
234    tic[7]  = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;
235    tic[7] |= mt->ms_mode << GM107_TIC2_7_MULTI_SAMPLE_COUNT__SHIFT;
236 
237    return &view->pipe;
238 }
239 
240 struct pipe_sampler_view *
gm107_create_texture_view_from_image(struct pipe_context * pipe,const struct pipe_image_view * view)241 gm107_create_texture_view_from_image(struct pipe_context *pipe,
242                                      const struct pipe_image_view *view)
243 {
244    struct nv04_resource *res = nv04_resource(view->resource);
245    struct pipe_sampler_view templ = {};
246    enum pipe_texture_target target;
247    uint32_t flags = 0;
248 
249    if (!res)
250       return NULL;
251    target = res->base.target;
252 
253    if (target == PIPE_TEXTURE_CUBE || target == PIPE_TEXTURE_CUBE_ARRAY)
254       target = PIPE_TEXTURE_2D_ARRAY;
255 
256    templ.format = view->format;
257    templ.swizzle_r = PIPE_SWIZZLE_X;
258    templ.swizzle_g = PIPE_SWIZZLE_Y;
259    templ.swizzle_b = PIPE_SWIZZLE_Z;
260    templ.swizzle_a = PIPE_SWIZZLE_W;
261 
262    if (target == PIPE_BUFFER) {
263       templ.u.buf.offset = view->u.buf.offset;
264       templ.u.buf.size = view->u.buf.size;
265    } else {
266       templ.u.tex.first_layer = view->u.tex.first_layer;
267       templ.u.tex.last_layer = view->u.tex.last_layer;
268       templ.u.tex.first_level = templ.u.tex.last_level = view->u.tex.level;
269    }
270 
271    flags = NV50_TEXVIEW_SCALED_COORDS | NV50_TEXVIEW_IMAGE_GM107;
272 
273    return nvc0_create_texture_view(pipe, &res->base, &templ, flags, target);
274 }
275 
276 static struct pipe_sampler_view *
gf100_create_texture_view(struct pipe_context * pipe,struct pipe_resource * texture,const struct pipe_sampler_view * templ,uint32_t flags,enum pipe_texture_target target)277 gf100_create_texture_view(struct pipe_context *pipe,
278                           struct pipe_resource *texture,
279                           const struct pipe_sampler_view *templ,
280                           uint32_t flags,
281                           enum pipe_texture_target target)
282 {
283    const struct util_format_description *desc;
284    const struct nvc0_format *fmt;
285    uint64_t address;
286    uint32_t *tic;
287    uint32_t swz[4];
288    uint32_t width, height;
289    uint32_t depth;
290    uint32_t tex_fmt;
291    struct nv50_tic_entry *view;
292    struct nv50_miptree *mt;
293    bool tex_int;
294 
295    view = MALLOC_STRUCT(nv50_tic_entry);
296    if (!view)
297       return NULL;
298    mt = nv50_miptree(texture);
299 
300    view->pipe = *templ;
301    view->pipe.reference.count = 1;
302    view->pipe.texture = NULL;
303    view->pipe.context = pipe;
304 
305    view->id = -1;
306    view->bindless = 0;
307 
308    pipe_resource_reference(&view->pipe.texture, texture);
309 
310    tic = &view->tic[0];
311 
312    desc = util_format_description(view->pipe.format);
313 
314    fmt = &nvc0_format_table[view->pipe.format];
315 
316    tex_int = util_format_is_pure_integer(view->pipe.format);
317    tex_fmt = fmt->tic.format & 0x3f;
318 
319    swz[0] = nv50_tic_swizzle(fmt, view->pipe.swizzle_r, tex_int);
320    swz[1] = nv50_tic_swizzle(fmt, view->pipe.swizzle_g, tex_int);
321    swz[2] = nv50_tic_swizzle(fmt, view->pipe.swizzle_b, tex_int);
322    swz[3] = nv50_tic_swizzle(fmt, view->pipe.swizzle_a, tex_int);
323    tic[0] = (tex_fmt << G80_TIC_0_COMPONENTS_SIZES__SHIFT) |
324             (fmt->tic.type_r << G80_TIC_0_R_DATA_TYPE__SHIFT) |
325             (fmt->tic.type_g << G80_TIC_0_G_DATA_TYPE__SHIFT) |
326             (fmt->tic.type_b << G80_TIC_0_B_DATA_TYPE__SHIFT) |
327             (fmt->tic.type_a << G80_TIC_0_A_DATA_TYPE__SHIFT) |
328             (swz[0] << G80_TIC_0_X_SOURCE__SHIFT) |
329             (swz[1] << G80_TIC_0_Y_SOURCE__SHIFT) |
330             (swz[2] << G80_TIC_0_Z_SOURCE__SHIFT) |
331             (swz[3] << G80_TIC_0_W_SOURCE__SHIFT) |
332             ((fmt->tic.format & 0x40) << (GK20A_TIC_0_USE_COMPONENT_SIZES_EXTENDED__SHIFT - 6));
333 
334    address = mt->base.address;
335 
336    tic[2] = 0x10001000 | G80_TIC_2_BORDER_SOURCE_COLOR;
337 
338    if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
339       tic[2] |= G80_TIC_2_SRGB_CONVERSION;
340 
341    if (!(flags & NV50_TEXVIEW_SCALED_COORDS))
342       tic[2] |= G80_TIC_2_NORMALIZED_COORDS;
343 
344    /* check for linear storage type */
345    if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) {
346       if (texture->target == PIPE_BUFFER) {
347          assert(!(tic[2] & G80_TIC_2_NORMALIZED_COORDS));
348          address +=
349             view->pipe.u.buf.offset;
350          tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_ONE_D_BUFFER;
351          tic[3] = 0;
352          tic[4] = /* width */
353             view->pipe.u.buf.size / (desc->block.bits / 8);
354          tic[5] = 0;
355       } else {
356          /* must be 2D texture without mip maps */
357          tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_TWO_D_NO_MIPMAP;
358          tic[3] = mt->level[0].pitch;
359          tic[4] = mt->base.base.width0;
360          tic[5] = (1 << 16) | mt->base.base.height0;
361       }
362       tic[6] =
363       tic[7] = 0;
364       tic[1] = address;
365       tic[2] |= address >> 32;
366       return &view->pipe;
367    }
368 
369    tic[2] |=
370       ((mt->level[0].tile_mode & 0x0f0) << (22 - 4)) |
371       ((mt->level[0].tile_mode & 0xf00) << (25 - 8));
372 
373    depth = MAX2(mt->base.base.array_size, mt->base.base.depth0);
374 
375    if (mt->base.base.array_size > 1) {
376       /* there doesn't seem to be a base layer field in TIC */
377       address += view->pipe.u.tex.first_layer * mt->layer_stride;
378       depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1;
379    }
380    tic[1] = address;
381    tic[2] |= address >> 32;
382 
383    switch (target) {
384    case PIPE_TEXTURE_1D:
385       tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D;
386       break;
387    case PIPE_TEXTURE_2D:
388       tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D;
389       break;
390    case PIPE_TEXTURE_RECT:
391       tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D;
392       break;
393    case PIPE_TEXTURE_3D:
394       tic[2] |= G80_TIC_2_TEXTURE_TYPE_THREE_D;
395       break;
396    case PIPE_TEXTURE_CUBE:
397       depth /= 6;
398       tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBEMAP;
399       break;
400    case PIPE_TEXTURE_1D_ARRAY:
401       tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D_ARRAY;
402       break;
403    case PIPE_TEXTURE_2D_ARRAY:
404       tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D_ARRAY;
405       break;
406    case PIPE_TEXTURE_CUBE_ARRAY:
407       depth /= 6;
408       tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBE_ARRAY;
409       break;
410    default:
411       unreachable("unexpected/invalid texture target");
412    }
413 
414    tic[3] = (flags & NV50_TEXVIEW_FILTER_MSAA8) ? 0x20000000 : 0x00300000;
415 
416    if (flags & NV50_TEXVIEW_ACCESS_RESOLVE) {
417       width = mt->base.base.width0 << mt->ms_x;
418       height = mt->base.base.height0 << mt->ms_y;
419    } else {
420       width = mt->base.base.width0;
421       height = mt->base.base.height0;
422    }
423 
424    tic[4] = (1 << 31) | width;
425 
426    tic[5] = height & 0xffff;
427    tic[5] |= depth << 16;
428    tic[5] |= mt->base.base.last_level << 28;
429 
430    /* sampling points: (?) */
431    if (flags & NV50_TEXVIEW_ACCESS_RESOLVE)
432       tic[6] = (mt->ms_x > 1) ? 0x88000000 : 0x03000000;
433    else
434       tic[6] = 0x03000000;
435 
436    tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;
437    tic[7] |= mt->ms_mode << 12;
438 
439    return &view->pipe;
440 }
441 
442 struct pipe_sampler_view *
nvc0_create_texture_view(struct pipe_context * pipe,struct pipe_resource * texture,const struct pipe_sampler_view * templ,uint32_t flags,enum pipe_texture_target target)443 nvc0_create_texture_view(struct pipe_context *pipe,
444                          struct pipe_resource *texture,
445                          const struct pipe_sampler_view *templ,
446                          uint32_t flags,
447                          enum pipe_texture_target target)
448 {
449    if (nvc0_context(pipe)->screen->tic.maxwell)
450       return gm107_create_texture_view(pipe, texture, templ, flags, target);
451    return gf100_create_texture_view(pipe, texture, templ, flags, target);
452 }
453 
454 bool
nvc0_update_tic(struct nvc0_context * nvc0,struct nv50_tic_entry * tic,struct nv04_resource * res)455 nvc0_update_tic(struct nvc0_context *nvc0, struct nv50_tic_entry *tic,
456                 struct nv04_resource *res)
457 {
458    uint64_t address = res->address;
459    if (res->base.target != PIPE_BUFFER)
460       return false;
461    address += tic->pipe.u.buf.offset;
462    if (tic->tic[1] == (uint32_t)address &&
463        (tic->tic[2] & 0xff) == address >> 32)
464       return false;
465 
466    tic->tic[1] = address;
467    tic->tic[2] &= 0xffffff00;
468    tic->tic[2] |= address >> 32;
469 
470    if (tic->id >= 0) {
471       nvc0->base.push_data(&nvc0->base, nvc0->screen->txc, tic->id * 32,
472                            NV_VRAM_DOMAIN(&nvc0->screen->base), 32,
473                            tic->tic);
474       return true;
475    }
476 
477    return false;
478 }
479 
480 bool
nvc0_validate_tic(struct nvc0_context * nvc0,int s)481 nvc0_validate_tic(struct nvc0_context *nvc0, int s)
482 {
483    uint32_t commands[32];
484    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
485    unsigned i;
486    unsigned n = 0;
487    bool need_flush = false;
488 
489    for (i = 0; i < nvc0->num_textures[s]; ++i) {
490       struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
491       struct nv04_resource *res;
492       const bool dirty = !!(nvc0->textures_dirty[s] & (1 << i));
493 
494       if (!tic) {
495          if (dirty)
496             commands[n++] = (i << 1) | 0;
497          continue;
498       }
499       res = nv04_resource(tic->pipe.texture);
500       need_flush |= nvc0_update_tic(nvc0, tic, res);
501 
502       if (tic->id < 0) {
503          tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
504 
505          nvc0->base.push_data(&nvc0->base, nvc0->screen->txc, tic->id * 32,
506                               NV_VRAM_DOMAIN(&nvc0->screen->base), 32,
507                               tic->tic);
508          need_flush = true;
509       } else
510       if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
511          if (unlikely(s == 5))
512             BEGIN_NVC0(push, NVC0_CP(TEX_CACHE_CTL), 1);
513          else
514             BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
515          PUSH_DATA (push, (tic->id << 4) | 1);
516          NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_cache_flush_count, 1);
517       }
518       nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
519 
520       res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
521       res->status |=  NOUVEAU_BUFFER_STATUS_GPU_READING;
522 
523       if (!dirty)
524          continue;
525       commands[n++] = (tic->id << 9) | (i << 1) | 1;
526 
527       if (unlikely(s == 5))
528          BCTX_REFN(nvc0->bufctx_cp, CP_TEX(i), res, RD);
529       else
530          BCTX_REFN(nvc0->bufctx_3d, 3D_TEX(s, i), res, RD);
531    }
532    for (; i < nvc0->state.num_textures[s]; ++i)
533       commands[n++] = (i << 1) | 0;
534 
535    nvc0->state.num_textures[s] = nvc0->num_textures[s];
536 
537    if (n) {
538       if (unlikely(s == 5))
539          BEGIN_NIC0(push, NVC0_CP(BIND_TIC), n);
540       else
541          BEGIN_NIC0(push, NVC0_3D(BIND_TIC(s)), n);
542       PUSH_DATAp(push, commands, n);
543    }
544    nvc0->textures_dirty[s] = 0;
545 
546    return need_flush;
547 }
548 
549 static bool
nve4_validate_tic(struct nvc0_context * nvc0,unsigned s)550 nve4_validate_tic(struct nvc0_context *nvc0, unsigned s)
551 {
552    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
553    unsigned i;
554    bool need_flush = false;
555 
556    for (i = 0; i < nvc0->num_textures[s]; ++i) {
557       struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
558       struct nv04_resource *res;
559       const bool dirty = !!(nvc0->textures_dirty[s] & (1 << i));
560 
561       if (!tic) {
562          nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
563          continue;
564       }
565       res = nv04_resource(tic->pipe.texture);
566       need_flush |= nvc0_update_tic(nvc0, tic, res);
567 
568       if (tic->id < 0) {
569          tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
570 
571          nvc0->base.push_data(&nvc0->base, nvc0->screen->txc, tic->id * 32,
572                               NV_VRAM_DOMAIN(&nvc0->screen->base), 32,
573                               tic->tic);
574          need_flush = true;
575       } else
576       if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
577          BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
578          PUSH_DATA (push, (tic->id << 4) | 1);
579       }
580       nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
581 
582       res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
583       res->status |=  NOUVEAU_BUFFER_STATUS_GPU_READING;
584 
585       nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID;
586       nvc0->tex_handles[s][i] |= tic->id;
587       if (dirty)
588          BCTX_REFN(nvc0->bufctx_3d, 3D_TEX(s, i), res, RD);
589    }
590    for (; i < nvc0->state.num_textures[s]; ++i) {
591       nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
592       nvc0->textures_dirty[s] |= 1 << i;
593    }
594 
595    nvc0->state.num_textures[s] = nvc0->num_textures[s];
596 
597    return need_flush;
598 }
599 
nvc0_validate_textures(struct nvc0_context * nvc0)600 void nvc0_validate_textures(struct nvc0_context *nvc0)
601 {
602    bool need_flush = false;
603    int i;
604 
605    for (i = 0; i < 5; i++) {
606       if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
607          need_flush |= nve4_validate_tic(nvc0, i);
608       else
609          need_flush |= nvc0_validate_tic(nvc0, i);
610    }
611 
612    if (need_flush) {
613       BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TIC_FLUSH), 1);
614       PUSH_DATA (nvc0->base.pushbuf, 0);
615    }
616 
617    /* Invalidate all CP textures because they are aliased. */
618    for (int i = 0; i < nvc0->num_textures[5]; i++)
619       nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_TEX(i));
620    nvc0->textures_dirty[5] = ~0;
621    nvc0->dirty_cp |= NVC0_NEW_CP_TEXTURES;
622 }
623 
624 bool
nvc0_validate_tsc(struct nvc0_context * nvc0,int s)625 nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
626 {
627    uint32_t commands[16];
628    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
629    unsigned i;
630    unsigned n = 0;
631    bool need_flush = false;
632 
633    for (i = 0; i < nvc0->num_samplers[s]; ++i) {
634       struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]);
635 
636       if (!(nvc0->samplers_dirty[s] & (1 << i)))
637          continue;
638       if (!tsc) {
639          commands[n++] = (i << 4) | 0;
640          continue;
641       }
642       nvc0->seamless_cube_map = tsc->seamless_cube_map;
643       if (tsc->id < 0) {
644          tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);
645 
646          nvc0_m2mf_push_linear(&nvc0->base, nvc0->screen->txc,
647                                65536 + tsc->id * 32, NV_VRAM_DOMAIN(&nvc0->screen->base),
648                                32, tsc->tsc);
649          need_flush = true;
650       }
651       nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
652 
653       commands[n++] = (tsc->id << 12) | (i << 4) | 1;
654    }
655    for (; i < nvc0->state.num_samplers[s]; ++i)
656       commands[n++] = (i << 4) | 0;
657 
658    nvc0->state.num_samplers[s] = nvc0->num_samplers[s];
659 
660    // TXF, in unlinked tsc mode, will always use sampler 0. So we have to
661    // ensure that it remains bound. Its contents don't matter, all samplers we
662    // ever create have the SRGB_CONVERSION bit set, so as long as the first
663    // entry is initialized, we're good to go. This is the only bit that has
664    // any effect on what TXF does.
665    if ((nvc0->samplers_dirty[s] & 1) && !nvc0->samplers[s][0]) {
666       if (n == 0)
667          n = 1;
668       // We're guaranteed that the first command refers to the first slot, so
669       // we're not overwriting a valid entry.
670       commands[0] = (0 << 12) | (0 << 4) | 1;
671    }
672 
673    if (n) {
674       if (unlikely(s == 5))
675          BEGIN_NIC0(push, NVC0_CP(BIND_TSC), n);
676       else
677          BEGIN_NIC0(push, NVC0_3D(BIND_TSC(s)), n);
678       PUSH_DATAp(push, commands, n);
679    }
680    nvc0->samplers_dirty[s] = 0;
681 
682    return need_flush;
683 }
684 
685 bool
nve4_validate_tsc(struct nvc0_context * nvc0,int s)686 nve4_validate_tsc(struct nvc0_context *nvc0, int s)
687 {
688    unsigned i;
689    bool need_flush = false;
690 
691    for (i = 0; i < nvc0->num_samplers[s]; ++i) {
692       struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]);
693 
694       if (!tsc) {
695          nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID;
696          continue;
697       }
698       if (tsc->id < 0) {
699          tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);
700 
701          nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc,
702                                65536 + tsc->id * 32,
703                                NV_VRAM_DOMAIN(&nvc0->screen->base),
704                                32, tsc->tsc);
705          need_flush = true;
706       }
707       nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
708 
709       nvc0->tex_handles[s][i] &= ~NVE4_TSC_ENTRY_INVALID;
710       nvc0->tex_handles[s][i] |= tsc->id << 20;
711    }
712    for (; i < nvc0->state.num_samplers[s]; ++i) {
713       nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID;
714       nvc0->samplers_dirty[s] |= 1 << i;
715    }
716 
717    nvc0->state.num_samplers[s] = nvc0->num_samplers[s];
718 
719    return need_flush;
720 }
721 
nvc0_validate_samplers(struct nvc0_context * nvc0)722 void nvc0_validate_samplers(struct nvc0_context *nvc0)
723 {
724    bool need_flush = false;
725    int i;
726 
727    for (i = 0; i < 5; i++) {
728       if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
729          need_flush |= nve4_validate_tsc(nvc0, i);
730       else
731          need_flush |= nvc0_validate_tsc(nvc0, i);
732    }
733 
734    if (need_flush) {
735       BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TSC_FLUSH), 1);
736       PUSH_DATA (nvc0->base.pushbuf, 0);
737    }
738 
739    /* Invalidate all CP samplers because they are aliased. */
740    nvc0->samplers_dirty[5] = ~0;
741    nvc0->dirty_cp |= NVC0_NEW_CP_SAMPLERS;
742 }
743 
744 void
nvc0_upload_tsc0(struct nvc0_context * nvc0)745 nvc0_upload_tsc0(struct nvc0_context *nvc0)
746 {
747    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
748    u32 data[8] = { G80_TSC_0_SRGB_CONVERSION };
749    nvc0->base.push_data(&nvc0->base, nvc0->screen->txc,
750                         65536 /*+ tsc->id * 32*/,
751                         NV_VRAM_DOMAIN(&nvc0->screen->base), 32, data);
752    BEGIN_NVC0(push, NVC0_3D(TSC_FLUSH), 1);
753    PUSH_DATA (push, 0);
754 }
755 
756 /* Upload the "diagonal" entries for the possible texture sources ($t == $s).
757  * At some point we might want to get a list of the combinations used by a
758  * shader and fill in those entries instead of having it extract the handles.
759  */
760 void
nve4_set_tex_handles(struct nvc0_context * nvc0)761 nve4_set_tex_handles(struct nvc0_context *nvc0)
762 {
763    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
764    struct nvc0_screen *screen = nvc0->screen;
765    unsigned s;
766 
767    if (nvc0->screen->base.class_3d < NVE4_3D_CLASS)
768       return;
769 
770    for (s = 0; s < 5; ++s) {
771       uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s];
772       if (!dirty)
773          continue;
774       BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
775       PUSH_DATA (push, NVC0_CB_AUX_SIZE);
776       PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
777       PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
778       do {
779          int i = ffs(dirty) - 1;
780          dirty &= ~(1 << i);
781 
782          BEGIN_NVC0(push, NVC0_3D(CB_POS), 2);
783          PUSH_DATA (push, NVC0_CB_AUX_TEX_INFO(i));
784          PUSH_DATA (push, nvc0->tex_handles[s][i]);
785       } while (dirty);
786 
787       nvc0->textures_dirty[s] = 0;
788       nvc0->samplers_dirty[s] = 0;
789    }
790 }
791 
792 static uint64_t
nve4_create_texture_handle(struct pipe_context * pipe,struct pipe_sampler_view * view,const struct pipe_sampler_state * sampler)793 nve4_create_texture_handle(struct pipe_context *pipe,
794                            struct pipe_sampler_view *view,
795                            const struct pipe_sampler_state *sampler)
796 {
797    /* We have to create persistent handles that won't change for these objects
798     * That means that we have to upload them into place and lock them so that
799     * they can't be kicked out later.
800     */
801    struct nvc0_context *nvc0 = nvc0_context(pipe);
802    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
803    struct nv50_tic_entry *tic = nv50_tic_entry(view);
804    struct nv50_tsc_entry *tsc = pipe->create_sampler_state(pipe, sampler);
805    struct pipe_sampler_view *v = NULL;
806 
807    tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);
808    if (tsc->id < 0)
809       goto fail;
810 
811    if (tic->id < 0) {
812       tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
813       if (tic->id < 0)
814          goto fail;
815 
816       nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc, tic->id * 32,
817                             NV_VRAM_DOMAIN(&nvc0->screen->base), 32,
818                             tic->tic);
819 
820       IMMED_NVC0(push, NVC0_3D(TIC_FLUSH), 0);
821    }
822 
823    nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc,
824                          65536 + tsc->id * 32,
825                          NV_VRAM_DOMAIN(&nvc0->screen->base),
826                          32, tsc->tsc);
827 
828    IMMED_NVC0(push, NVC0_3D(TSC_FLUSH), 0);
829 
830    // Add an extra reference to this sampler view effectively held by this
831    // texture handle. This is to deal with the sampler view being dereferenced
832    // before the handle is. However we need the view to still be live until the
833    // handle to it is deleted.
834    pipe_sampler_view_reference(&v, view);
835    p_atomic_inc(&tic->bindless);
836 
837    nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
838    nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
839 
840    return 0x100000000ULL | (tsc->id << 20) | tic->id;
841 
842 fail:
843    pipe->delete_sampler_state(pipe, tsc);
844    return 0;
845 }
846 
847 static bool
view_bound(struct nvc0_context * nvc0,struct pipe_sampler_view * view)848 view_bound(struct nvc0_context *nvc0, struct pipe_sampler_view *view) {
849    for (int s = 0; s < 6; s++) {
850       for (int i = 0; i < nvc0->num_textures[s]; i++)
851          if (nvc0->textures[s][i] == view)
852             return true;
853    }
854    return false;
855 }
856 
857 static void
nve4_delete_texture_handle(struct pipe_context * pipe,uint64_t handle)858 nve4_delete_texture_handle(struct pipe_context *pipe, uint64_t handle)
859 {
860    struct nvc0_context *nvc0 = nvc0_context(pipe);
861    uint32_t tic = handle & NVE4_TIC_ENTRY_INVALID;
862    uint32_t tsc = (handle & NVE4_TSC_ENTRY_INVALID) >> 20;
863    struct nv50_tic_entry *entry = nvc0->screen->tic.entries[tic];
864 
865    if (entry) {
866       struct pipe_sampler_view *view = &entry->pipe;
867       assert(entry->bindless);
868       p_atomic_dec(&entry->bindless);
869       if (!view_bound(nvc0, view))
870          nvc0_screen_tic_unlock(nvc0->screen, entry);
871       pipe_sampler_view_reference(&view, NULL);
872    }
873 
874    pipe->delete_sampler_state(pipe, nvc0->screen->tsc.entries[tsc]);
875 }
876 
877 static void
nve4_make_texture_handle_resident(struct pipe_context * pipe,uint64_t handle,bool resident)878 nve4_make_texture_handle_resident(struct pipe_context *pipe,
879                                   uint64_t handle, bool resident)
880 {
881    struct nvc0_context *nvc0 = nvc0_context(pipe);
882    if (resident) {
883       struct nvc0_resident *res = calloc(1, sizeof(struct nvc0_resident));
884       struct nv50_tic_entry *tic =
885          nvc0->screen->tic.entries[handle & NVE4_TIC_ENTRY_INVALID];
886       assert(tic);
887       assert(tic->bindless);
888 
889       res->handle = handle;
890       res->buf = nv04_resource(tic->pipe.texture);
891       res->flags = NOUVEAU_BO_RD;
892       list_add(&res->list, &nvc0->tex_head);
893    } else {
894       list_for_each_entry_safe(struct nvc0_resident, pos, &nvc0->tex_head, list) {
895          if (pos->handle == handle) {
896             list_del(&pos->list);
897             free(pos);
898             break;
899          }
900       }
901    }
902 }
903 
904 static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT];
905 static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT];
906 static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT];
907 
908 static void
nvc0_get_surface_dims(const struct pipe_image_view * view,int * width,int * height,int * depth)909 nvc0_get_surface_dims(const struct pipe_image_view *view,
910                       int *width, int *height, int *depth)
911 {
912    struct nv04_resource *res = nv04_resource(view->resource);
913    int level;
914 
915    *width = *height = *depth = 1;
916    if (res->base.target == PIPE_BUFFER) {
917       *width = view->u.buf.size / util_format_get_blocksize(view->format);
918       return;
919    }
920 
921    level = view->u.tex.level;
922    *width = u_minify(view->resource->width0, level);
923    *height = u_minify(view->resource->height0, level);
924    *depth = u_minify(view->resource->depth0, level);
925 
926    switch (res->base.target) {
927    case PIPE_TEXTURE_1D_ARRAY:
928    case PIPE_TEXTURE_2D_ARRAY:
929    case PIPE_TEXTURE_CUBE:
930    case PIPE_TEXTURE_CUBE_ARRAY:
931       *depth = view->u.tex.last_layer - view->u.tex.first_layer + 1;
932       break;
933    case PIPE_TEXTURE_1D:
934    case PIPE_TEXTURE_2D:
935    case PIPE_TEXTURE_RECT:
936    case PIPE_TEXTURE_3D:
937       break;
938    default:
939       assert(!"unexpected texture target");
940       break;
941    }
942 }
943 
944 void
nvc0_mark_image_range_valid(const struct pipe_image_view * view)945 nvc0_mark_image_range_valid(const struct pipe_image_view *view)
946 {
947    struct nv04_resource *res = (struct nv04_resource *)view->resource;
948 
949    assert(view->resource->target == PIPE_BUFFER);
950 
951    util_range_add(&res->base, &res->valid_buffer_range,
952                   view->u.buf.offset,
953                   view->u.buf.offset + view->u.buf.size);
954 }
955 
956 void
nve4_set_surface_info(struct nouveau_pushbuf * push,const struct pipe_image_view * view,struct nvc0_context * nvc0)957 nve4_set_surface_info(struct nouveau_pushbuf *push,
958                       const struct pipe_image_view *view,
959                       struct nvc0_context *nvc0)
960 {
961    struct nvc0_screen *screen = nvc0->screen;
962    struct nv04_resource *res;
963    uint64_t address;
964    uint32_t *const info = push->cur;
965    int width, height, depth;
966    uint8_t log2cpp;
967 
968    if (view && !nve4_su_format_map[view->format])
969       NOUVEAU_ERR("unsupported surface format, try is_format_supported() !\n");
970 
971    push->cur += 16;
972 
973    if (!view || !nve4_su_format_map[view->format]) {
974       memset(info, 0, 16 * sizeof(*info));
975 
976       info[0] = 0xbadf0000;
977       info[1] = 0x80004000;
978       info[12] = nve4_suldp_lib_offset[PIPE_FORMAT_R32G32B32A32_UINT] +
979          screen->lib_code->start;
980       return;
981    }
982    res = nv04_resource(view->resource);
983 
984    address = res->address;
985 
986    /* get surface dimensions based on the target. */
987    nvc0_get_surface_dims(view, &width, &height, &depth);
988 
989    info[8] = width;
990    info[9] = height;
991    info[10] = depth;
992    switch (res->base.target) {
993    case PIPE_TEXTURE_1D_ARRAY:
994       info[11] = 1;
995       break;
996    case PIPE_TEXTURE_2D:
997    case PIPE_TEXTURE_RECT:
998       info[11] = 2;
999       break;
1000    case PIPE_TEXTURE_3D:
1001       info[11] = 3;
1002       break;
1003    case PIPE_TEXTURE_2D_ARRAY:
1004    case PIPE_TEXTURE_CUBE:
1005    case PIPE_TEXTURE_CUBE_ARRAY:
1006       info[11] = 4;
1007       break;
1008    default:
1009       info[11] = 0;
1010       break;
1011    }
1012    log2cpp = (0xf000 & nve4_su_format_aux_map[view->format]) >> 12;
1013 
1014    /* Stick the blockwidth (ie. number of bytes per pixel) to check if the
1015     * format doesn't mismatch. */
1016    info[12] = util_format_get_blocksize(view->format);
1017 
1018    /* limit in bytes for raw access */
1019    info[13] = (0x06 << 22) | ((width << log2cpp) - 1);
1020 
1021    info[1] = nve4_su_format_map[view->format];
1022 
1023 #if 0
1024    switch (util_format_get_blocksizebits(view->format)) {
1025    case  16: info[1] |= 1 << 16; break;
1026    case  32: info[1] |= 2 << 16; break;
1027    case  64: info[1] |= 3 << 16; break;
1028    case 128: info[1] |= 4 << 16; break;
1029    default:
1030       break;
1031    }
1032 #else
1033    info[1] |= log2cpp << 16;
1034    info[1] |=  0x4000;
1035    info[1] |= (0x0f00 & nve4_su_format_aux_map[view->format]);
1036 #endif
1037 
1038    if (res->base.target == PIPE_BUFFER) {
1039       address += view->u.buf.offset;
1040 
1041       info[0]  = address >> 8;
1042       info[2]  = width - 1;
1043       info[2] |= (0xff & nve4_su_format_aux_map[view->format]) << 22;
1044       info[3]  = 0;
1045       info[4]  = 0;
1046       info[5]  = 0;
1047       info[6]  = 0;
1048       info[7]  = 0;
1049       info[14] = 0;
1050       info[15] = 0;
1051    } else {
1052       struct nv50_miptree *mt = nv50_miptree(&res->base);
1053       struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level];
1054       unsigned z = view->u.tex.first_layer;
1055 
1056       if (!mt->layout_3d) {
1057          address += mt->layer_stride * z;
1058          z = 0;
1059       }
1060 
1061       address += lvl->offset;
1062 
1063       info[0]  = address >> 8;
1064       info[2]  = (width << mt->ms_x) - 1;
1065       /* NOTE: this is really important: */
1066       info[2] |= (0xff & nve4_su_format_aux_map[view->format]) << 22;
1067       info[3]  = (0x88 << 24) | (lvl->pitch / 64);
1068       info[4]  = (height << mt->ms_y) - 1;
1069       info[4] |= (lvl->tile_mode & 0x0f0) << 25;
1070       info[4] |= NVC0_TILE_SHIFT_Y(lvl->tile_mode) << 22;
1071       info[5]  = mt->layer_stride >> 8;
1072       info[6]  = depth - 1;
1073       info[6] |= (lvl->tile_mode & 0xf00) << 21;
1074       info[6] |= NVC0_TILE_SHIFT_Z(lvl->tile_mode) << 22;
1075       info[7]  = mt->layout_3d ? 1 : 0;
1076       info[7] |= z << 16;
1077       info[14] = mt->ms_x;
1078       info[15] = mt->ms_y;
1079    }
1080 }
1081 
1082 static inline void
nvc0_set_surface_info(struct nouveau_pushbuf * push,const struct pipe_image_view * view,uint64_t address,int width,int height,int depth)1083 nvc0_set_surface_info(struct nouveau_pushbuf *push,
1084                       const struct pipe_image_view *view, uint64_t address,
1085                       int width, int height, int depth)
1086 {
1087    struct nv04_resource *res;
1088    uint32_t *const info = push->cur;
1089 
1090    push->cur += 16;
1091 
1092    /* Make sure to always initialize the surface information area because it's
1093     * used to check if the given image is bound or not. */
1094    memset(info, 0, 16 * sizeof(*info));
1095 
1096    if (!view || !view->resource)
1097       return;
1098    res = nv04_resource(view->resource);
1099 
1100    /* Stick the image dimensions for the imageSize() builtin. */
1101    info[8] = width;
1102    info[9] = height;
1103    info[10] = depth;
1104 
1105    /* Stick the blockwidth (ie. number of bytes per pixel) to calculate pixel
1106     * offset and to check if the format doesn't mismatch. */
1107    info[12] = util_format_get_blocksize(view->format);
1108 
1109    if (res->base.target == PIPE_BUFFER) {
1110       info[0]  = address >> 8;
1111       info[2]  = width;
1112    } else {
1113       struct nv50_miptree *mt = nv50_miptree(&res->base);
1114 
1115       info[0]  = address >> 8;
1116       info[2]  = width;
1117       info[4]  = height;
1118       info[5]  = mt->layer_stride >> 8;
1119       info[6]  = depth;
1120       info[14] = mt->ms_x;
1121       info[15] = mt->ms_y;
1122    }
1123 }
1124 
1125 void
nvc0_validate_suf(struct nvc0_context * nvc0,int s)1126 nvc0_validate_suf(struct nvc0_context *nvc0, int s)
1127 {
1128    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
1129    struct nvc0_screen *screen = nvc0->screen;
1130 
1131    for (int i = 0; i < NVC0_MAX_IMAGES; ++i) {
1132       struct pipe_image_view *view = &nvc0->images[s][i];
1133       int width, height, depth;
1134       uint64_t address = 0;
1135 
1136       if (s == 5)
1137          BEGIN_NVC0(push, NVC0_CP(IMAGE(i)), 6);
1138       else
1139          BEGIN_NVC0(push, NVC0_3D(IMAGE(i)), 6);
1140 
1141       if (view->resource) {
1142          struct nv04_resource *res = nv04_resource(view->resource);
1143          unsigned rt = nvc0_format_table[view->format].rt;
1144 
1145          if (util_format_is_depth_or_stencil(view->format))
1146             rt = rt << 12;
1147          else
1148             rt = (rt << 4) | (0x14 << 12);
1149 
1150          /* get surface dimensions based on the target. */
1151          nvc0_get_surface_dims(view, &width, &height, &depth);
1152 
1153          address = res->address;
1154          if (res->base.target == PIPE_BUFFER) {
1155             unsigned blocksize = util_format_get_blocksize(view->format);
1156 
1157             address += view->u.buf.offset;
1158             assert(!(address & 0xff));
1159 
1160             if (view->access & PIPE_IMAGE_ACCESS_WRITE)
1161                nvc0_mark_image_range_valid(view);
1162 
1163             PUSH_DATAh(push, address);
1164             PUSH_DATA (push, address);
1165             PUSH_DATA (push, align(width * blocksize, 0x100));
1166             PUSH_DATA (push, NVC0_3D_IMAGE_HEIGHT_LINEAR | 1);
1167             PUSH_DATA (push, rt);
1168             PUSH_DATA (push, 0);
1169          } else {
1170             struct nv50_miptree *mt = nv50_miptree(view->resource);
1171             struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level];
1172             const unsigned z = view->u.tex.first_layer;
1173 
1174             if (mt->layout_3d) {
1175                address += nvc0_mt_zslice_offset(mt, view->u.tex.level, z);
1176                if (depth >= 1) {
1177                   pipe_debug_message(&nvc0->base.debug, CONFORMANCE,
1178                                      "3D images are not supported!");
1179                   debug_printf("3D images are not supported!\n");
1180                }
1181             } else {
1182                address += mt->layer_stride * z;
1183             }
1184             address += lvl->offset;
1185 
1186             PUSH_DATAh(push, address);
1187             PUSH_DATA (push, address);
1188             PUSH_DATA (push, width << mt->ms_x);
1189             PUSH_DATA (push, height << mt->ms_y);
1190             PUSH_DATA (push, rt);
1191             PUSH_DATA (push, lvl->tile_mode & 0xff); /* mask out z-tiling */
1192          }
1193 
1194          if (s == 5)
1195             BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RDWR);
1196          else
1197             BCTX_REFN(nvc0->bufctx_3d, 3D_SUF, res, RDWR);
1198       } else {
1199          PUSH_DATA(push, 0);
1200          PUSH_DATA(push, 0);
1201          PUSH_DATA(push, 0);
1202          PUSH_DATA(push, 0);
1203          PUSH_DATA(push, 0x14000);
1204          PUSH_DATA(push, 0);
1205       }
1206 
1207       /* stick surface information into the driver constant buffer */
1208       if (s == 5)
1209          BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
1210       else
1211          BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
1212       PUSH_DATA (push, NVC0_CB_AUX_SIZE);
1213       PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
1214       PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
1215       if (s == 5)
1216          BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 16);
1217       else
1218          BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 16);
1219       PUSH_DATA (push, NVC0_CB_AUX_SU_INFO(i));
1220 
1221       nvc0_set_surface_info(push, view, address, width, height, depth);
1222    }
1223 }
1224 
1225 static inline void
nvc0_update_surface_bindings(struct nvc0_context * nvc0)1226 nvc0_update_surface_bindings(struct nvc0_context *nvc0)
1227 {
1228    nvc0_validate_suf(nvc0, 4);
1229 
1230    /* Invalidate all COMPUTE images because they are aliased with FRAGMENT. */
1231    nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF);
1232    nvc0->dirty_cp |= NVC0_NEW_CP_SURFACES;
1233    nvc0->images_dirty[5] |= nvc0->images_valid[5];
1234 }
1235 
1236 static void
gm107_validate_surfaces(struct nvc0_context * nvc0,struct pipe_image_view * view,int stage,int slot)1237 gm107_validate_surfaces(struct nvc0_context *nvc0,
1238                         struct pipe_image_view *view, int stage, int slot)
1239 {
1240    struct nv04_resource *res = nv04_resource(view->resource);
1241    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
1242    struct nvc0_screen *screen = nvc0->screen;
1243    struct nv50_tic_entry *tic;
1244 
1245    tic = nv50_tic_entry(nvc0->images_tic[stage][slot]);
1246 
1247    res = nv04_resource(tic->pipe.texture);
1248    nvc0_update_tic(nvc0, tic, res);
1249 
1250    if (tic->id < 0) {
1251       tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
1252 
1253       /* upload the texture view */
1254       nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc, tic->id * 32,
1255                             NV_VRAM_DOMAIN(&nvc0->screen->base), 32, tic->tic);
1256 
1257       BEGIN_NVC0(push, NVC0_3D(TIC_FLUSH), 1);
1258       PUSH_DATA (push, 0);
1259    } else
1260    if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
1261       BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
1262       PUSH_DATA (push, (tic->id << 4) | 1);
1263    }
1264    nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
1265 
1266    res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
1267    res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
1268 
1269    BCTX_REFN(nvc0->bufctx_3d, 3D_SUF, res, RD);
1270 
1271    /* upload the texture handle */
1272    BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
1273    PUSH_DATA (push, NVC0_CB_AUX_SIZE);
1274    PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(stage));
1275    PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(stage));
1276    BEGIN_NVC0(push, NVC0_3D(CB_POS), 2);
1277    PUSH_DATA (push, NVC0_CB_AUX_TEX_INFO(slot + 32));
1278    PUSH_DATA (push, tic->id);
1279 }
1280 
1281 static inline void
nve4_update_surface_bindings(struct nvc0_context * nvc0)1282 nve4_update_surface_bindings(struct nvc0_context *nvc0)
1283 {
1284    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
1285    struct nvc0_screen *screen = nvc0->screen;
1286    int i, j, s;
1287 
1288    for (s = 0; s < 5; s++) {
1289       if (!nvc0->images_dirty[s])
1290          continue;
1291 
1292       for (i = 0; i < NVC0_MAX_IMAGES; ++i) {
1293          struct pipe_image_view *view = &nvc0->images[s][i];
1294 
1295          BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
1296          PUSH_DATA (push, NVC0_CB_AUX_SIZE);
1297          PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
1298          PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
1299          BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 16);
1300          PUSH_DATA (push, NVC0_CB_AUX_SU_INFO(i));
1301 
1302          if (view->resource) {
1303             struct nv04_resource *res = nv04_resource(view->resource);
1304 
1305             if (res->base.target == PIPE_BUFFER) {
1306                if (view->access & PIPE_IMAGE_ACCESS_WRITE)
1307                   nvc0_mark_image_range_valid(view);
1308             }
1309 
1310             nve4_set_surface_info(push, view, nvc0);
1311             BCTX_REFN(nvc0->bufctx_3d, 3D_SUF, res, RDWR);
1312 
1313             if (nvc0->screen->base.class_3d >= GM107_3D_CLASS)
1314                gm107_validate_surfaces(nvc0, view, s, i);
1315          } else {
1316             for (j = 0; j < 16; j++)
1317                PUSH_DATA(push, 0);
1318          }
1319       }
1320    }
1321 }
1322 
1323 void
nvc0_validate_surfaces(struct nvc0_context * nvc0)1324 nvc0_validate_surfaces(struct nvc0_context *nvc0)
1325 {
1326    if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) {
1327       nve4_update_surface_bindings(nvc0);
1328    } else {
1329       nvc0_update_surface_bindings(nvc0);
1330    }
1331 }
1332 
1333 static uint64_t
nve4_create_image_handle(struct pipe_context * pipe,const struct pipe_image_view * view)1334 nve4_create_image_handle(struct pipe_context *pipe,
1335                          const struct pipe_image_view *view)
1336 {
1337    struct nvc0_context *nvc0 = nvc0_context(pipe);
1338    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
1339    struct nvc0_screen *screen = nvc0->screen;
1340    int i = screen->img.next, s;
1341 
1342    while (screen->img.entries[i]) {
1343       i = (i + 1) & (NVE4_IMG_MAX_HANDLES - 1);
1344       if (i == screen->img.next)
1345          return 0;
1346    }
1347 
1348    screen->img.next = (i + 1) & (NVE4_IMG_MAX_HANDLES - 1);
1349    screen->img.entries[i] = calloc(1, sizeof(struct pipe_image_view));
1350    *screen->img.entries[i] = *view;
1351 
1352    for (s = 0; s < 6; s++) {
1353       BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
1354       PUSH_DATA (push, NVC0_CB_AUX_SIZE);
1355       PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
1356       PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
1357       BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 16);
1358       PUSH_DATA (push, NVC0_CB_AUX_BINDLESS_INFO(i));
1359       nve4_set_surface_info(push, view, nvc0);
1360    }
1361 
1362    return 0x100000000ULL | i;
1363 }
1364 
1365 static void
nve4_delete_image_handle(struct pipe_context * pipe,uint64_t handle)1366 nve4_delete_image_handle(struct pipe_context *pipe, uint64_t handle)
1367 {
1368    struct nvc0_context *nvc0 = nvc0_context(pipe);
1369    struct nvc0_screen *screen = nvc0->screen;
1370    int i = handle & (NVE4_IMG_MAX_HANDLES - 1);
1371 
1372    free(screen->img.entries[i]);
1373    screen->img.entries[i] = NULL;
1374 }
1375 
1376 static void
nve4_make_image_handle_resident(struct pipe_context * pipe,uint64_t handle,unsigned access,bool resident)1377 nve4_make_image_handle_resident(struct pipe_context *pipe, uint64_t handle,
1378                                 unsigned access, bool resident)
1379 {
1380    struct nvc0_context *nvc0 = nvc0_context(pipe);
1381    struct nvc0_screen *screen = nvc0->screen;
1382 
1383    if (resident) {
1384       struct nvc0_resident *res = calloc(1, sizeof(struct nvc0_resident));
1385       struct pipe_image_view *view =
1386          screen->img.entries[handle & (NVE4_IMG_MAX_HANDLES - 1)];
1387       assert(view);
1388 
1389       if (view->resource->target == PIPE_BUFFER &&
1390           access & PIPE_IMAGE_ACCESS_WRITE)
1391          nvc0_mark_image_range_valid(view);
1392       res->handle = handle;
1393       res->buf = nv04_resource(view->resource);
1394       res->flags = (access & 3) << 8;
1395       list_add(&res->list, &nvc0->img_head);
1396    } else {
1397       list_for_each_entry_safe(struct nvc0_resident, pos, &nvc0->img_head, list) {
1398          if (pos->handle == handle) {
1399             list_del(&pos->list);
1400             free(pos);
1401             break;
1402          }
1403       }
1404    }
1405 }
1406 
1407 static uint64_t
gm107_create_image_handle(struct pipe_context * pipe,const struct pipe_image_view * view)1408 gm107_create_image_handle(struct pipe_context *pipe,
1409                           const struct pipe_image_view *view)
1410 {
1411    /* GM107+ use TIC handles to reference images. As such, image handles are
1412     * just the TIC id.
1413     */
1414    struct nvc0_context *nvc0 = nvc0_context(pipe);
1415    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
1416    struct pipe_sampler_view *sview =
1417       gm107_create_texture_view_from_image(pipe, view);
1418    struct nv50_tic_entry *tic = nv50_tic_entry(sview);
1419 
1420    if (tic == NULL)
1421       goto fail;
1422 
1423    tic->bindless = 1;
1424    tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
1425    if (tic->id < 0)
1426       goto fail;
1427 
1428    nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc, tic->id * 32,
1429                          NV_VRAM_DOMAIN(&nvc0->screen->base), 32,
1430                          tic->tic);
1431 
1432    IMMED_NVC0(push, NVC0_3D(TIC_FLUSH), 0);
1433 
1434    nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
1435 
1436    // Compute handle. This will include the TIC as well as some additional
1437    // info regarding the bound 3d surface layer, if applicable.
1438    uint64_t handle = 0x100000000ULL | tic->id;
1439    struct nv04_resource *res = nv04_resource(view->resource);
1440    if (res->base.target == PIPE_TEXTURE_3D) {
1441       handle |= 1 << 11;
1442       handle |= view->u.tex.first_layer << (11 + 16);
1443    }
1444    return handle;
1445 
1446 fail:
1447    FREE(tic);
1448    return 0;
1449 }
1450 
1451 static void
gm107_delete_image_handle(struct pipe_context * pipe,uint64_t handle)1452 gm107_delete_image_handle(struct pipe_context *pipe, uint64_t handle)
1453 {
1454    struct nvc0_context *nvc0 = nvc0_context(pipe);
1455    int tic = handle & NVE4_TIC_ENTRY_INVALID;
1456    struct nv50_tic_entry *entry = nvc0->screen->tic.entries[tic];
1457    struct pipe_sampler_view *view = &entry->pipe;
1458    assert(entry->bindless == 1);
1459    assert(!view_bound(nvc0, view));
1460    entry->bindless = 0;
1461    nvc0_screen_tic_unlock(nvc0->screen, entry);
1462    pipe_sampler_view_reference(&view, NULL);
1463 }
1464 
1465 static void
gm107_make_image_handle_resident(struct pipe_context * pipe,uint64_t handle,unsigned access,bool resident)1466 gm107_make_image_handle_resident(struct pipe_context *pipe, uint64_t handle,
1467                                  unsigned access, bool resident)
1468 {
1469    struct nvc0_context *nvc0 = nvc0_context(pipe);
1470 
1471    if (resident) {
1472       struct nvc0_resident *res = calloc(1, sizeof(struct nvc0_resident));
1473       struct nv50_tic_entry *tic =
1474          nvc0->screen->tic.entries[handle & NVE4_TIC_ENTRY_INVALID];
1475       assert(tic);
1476       assert(tic->bindless);
1477 
1478       res->handle = handle;
1479       res->buf = nv04_resource(tic->pipe.texture);
1480       res->flags = (access & 3) << 8;
1481       if (res->buf->base.target == PIPE_BUFFER &&
1482           access & PIPE_IMAGE_ACCESS_WRITE)
1483          util_range_add(&res->buf->base, &res->buf->valid_buffer_range,
1484                         tic->pipe.u.buf.offset,
1485                         tic->pipe.u.buf.offset + tic->pipe.u.buf.size);
1486       list_add(&res->list, &nvc0->img_head);
1487    } else {
1488       list_for_each_entry_safe(struct nvc0_resident, pos, &nvc0->img_head, list) {
1489          if (pos->handle == handle) {
1490             list_del(&pos->list);
1491             free(pos);
1492             break;
1493          }
1494       }
1495    }
1496 }
1497 
1498 void
nvc0_init_bindless_functions(struct pipe_context * pipe)1499 nvc0_init_bindless_functions(struct pipe_context *pipe) {
1500    pipe->create_texture_handle = nve4_create_texture_handle;
1501    pipe->delete_texture_handle = nve4_delete_texture_handle;
1502    pipe->make_texture_handle_resident = nve4_make_texture_handle_resident;
1503 
1504    if (nvc0_context(pipe)->screen->base.class_3d < GM107_3D_CLASS) {
1505       pipe->create_image_handle = nve4_create_image_handle;
1506       pipe->delete_image_handle = nve4_delete_image_handle;
1507       pipe->make_image_handle_resident = nve4_make_image_handle_resident;
1508    } else {
1509       pipe->create_image_handle = gm107_create_image_handle;
1510       pipe->delete_image_handle = gm107_delete_image_handle;
1511       pipe->make_image_handle_resident = gm107_make_image_handle_resident;
1512    }
1513 }
1514 
1515 
1516 static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT] =
1517 {
1518    [PIPE_FORMAT_R32G32B32A32_FLOAT] = GK104_IMAGE_FORMAT_RGBA32_FLOAT,
1519    [PIPE_FORMAT_R32G32B32A32_SINT] = GK104_IMAGE_FORMAT_RGBA32_SINT,
1520    [PIPE_FORMAT_R32G32B32A32_UINT] = GK104_IMAGE_FORMAT_RGBA32_UINT,
1521    [PIPE_FORMAT_R16G16B16A16_FLOAT] = GK104_IMAGE_FORMAT_RGBA16_FLOAT,
1522    [PIPE_FORMAT_R16G16B16A16_UNORM] = GK104_IMAGE_FORMAT_RGBA16_UNORM,
1523    [PIPE_FORMAT_R16G16B16A16_SNORM] = GK104_IMAGE_FORMAT_RGBA16_SNORM,
1524    [PIPE_FORMAT_R16G16B16A16_SINT] = GK104_IMAGE_FORMAT_RGBA16_SINT,
1525    [PIPE_FORMAT_R16G16B16A16_UINT] = GK104_IMAGE_FORMAT_RGBA16_UINT,
1526    [PIPE_FORMAT_B8G8R8A8_UNORM] = GK104_IMAGE_FORMAT_BGRA8_UNORM,
1527    [PIPE_FORMAT_R8G8B8A8_UNORM] = GK104_IMAGE_FORMAT_RGBA8_UNORM,
1528    [PIPE_FORMAT_R8G8B8A8_SNORM] = GK104_IMAGE_FORMAT_RGBA8_SNORM,
1529    [PIPE_FORMAT_R8G8B8A8_SINT] = GK104_IMAGE_FORMAT_RGBA8_SINT,
1530    [PIPE_FORMAT_R8G8B8A8_UINT] = GK104_IMAGE_FORMAT_RGBA8_UINT,
1531    [PIPE_FORMAT_R11G11B10_FLOAT] = GK104_IMAGE_FORMAT_R11G11B10_FLOAT,
1532    [PIPE_FORMAT_R10G10B10A2_UNORM] = GK104_IMAGE_FORMAT_RGB10_A2_UNORM,
1533    [PIPE_FORMAT_R10G10B10A2_UINT] = GK104_IMAGE_FORMAT_RGB10_A2_UINT,
1534    [PIPE_FORMAT_R32G32_FLOAT] = GK104_IMAGE_FORMAT_RG32_FLOAT,
1535    [PIPE_FORMAT_R32G32_SINT] = GK104_IMAGE_FORMAT_RG32_SINT,
1536    [PIPE_FORMAT_R32G32_UINT] = GK104_IMAGE_FORMAT_RG32_UINT,
1537    [PIPE_FORMAT_R16G16_FLOAT] = GK104_IMAGE_FORMAT_RG16_FLOAT,
1538    [PIPE_FORMAT_R16G16_UNORM] = GK104_IMAGE_FORMAT_RG16_UNORM,
1539    [PIPE_FORMAT_R16G16_SNORM] = GK104_IMAGE_FORMAT_RG16_SNORM,
1540    [PIPE_FORMAT_R16G16_SINT] = GK104_IMAGE_FORMAT_RG16_SINT,
1541    [PIPE_FORMAT_R16G16_UINT] = GK104_IMAGE_FORMAT_RG16_UINT,
1542    [PIPE_FORMAT_R8G8_UNORM] = GK104_IMAGE_FORMAT_RG8_UNORM,
1543    [PIPE_FORMAT_R8G8_SNORM] = GK104_IMAGE_FORMAT_RG8_SNORM,
1544    [PIPE_FORMAT_R8G8_SINT] = GK104_IMAGE_FORMAT_RG8_SINT,
1545    [PIPE_FORMAT_R8G8_UINT] = GK104_IMAGE_FORMAT_RG8_UINT,
1546    [PIPE_FORMAT_R32_FLOAT] = GK104_IMAGE_FORMAT_R32_FLOAT,
1547    [PIPE_FORMAT_R32_SINT] = GK104_IMAGE_FORMAT_R32_SINT,
1548    [PIPE_FORMAT_R32_UINT] = GK104_IMAGE_FORMAT_R32_UINT,
1549    [PIPE_FORMAT_R16_FLOAT] = GK104_IMAGE_FORMAT_R16_FLOAT,
1550    [PIPE_FORMAT_R16_UNORM] = GK104_IMAGE_FORMAT_R16_UNORM,
1551    [PIPE_FORMAT_R16_SNORM] = GK104_IMAGE_FORMAT_R16_SNORM,
1552    [PIPE_FORMAT_R16_SINT] = GK104_IMAGE_FORMAT_R16_SINT,
1553    [PIPE_FORMAT_R16_UINT] = GK104_IMAGE_FORMAT_R16_UINT,
1554    [PIPE_FORMAT_R8_UNORM] = GK104_IMAGE_FORMAT_R8_UNORM,
1555    [PIPE_FORMAT_R8_SNORM] = GK104_IMAGE_FORMAT_R8_SNORM,
1556    [PIPE_FORMAT_R8_SINT] = GK104_IMAGE_FORMAT_R8_SINT,
1557    [PIPE_FORMAT_R8_UINT] = GK104_IMAGE_FORMAT_R8_UINT,
1558 };
1559 
1560 /* Auxiliary format description values for surface instructions.
1561  * (log2(bytes per pixel) << 12) | (unk8 << 8) | unk22
1562  */
1563 static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT] =
1564 {
1565    [PIPE_FORMAT_R32G32B32A32_FLOAT] = 0x4842,
1566    [PIPE_FORMAT_R32G32B32A32_SINT] = 0x4842,
1567    [PIPE_FORMAT_R32G32B32A32_UINT] = 0x4842,
1568 
1569    [PIPE_FORMAT_R16G16B16A16_UNORM] = 0x3933,
1570    [PIPE_FORMAT_R16G16B16A16_SNORM] = 0x3933,
1571    [PIPE_FORMAT_R16G16B16A16_SINT] = 0x3933,
1572    [PIPE_FORMAT_R16G16B16A16_UINT] = 0x3933,
1573    [PIPE_FORMAT_R16G16B16A16_FLOAT] = 0x3933,
1574 
1575    [PIPE_FORMAT_R32G32_FLOAT] = 0x3433,
1576    [PIPE_FORMAT_R32G32_SINT] = 0x3433,
1577    [PIPE_FORMAT_R32G32_UINT] = 0x3433,
1578 
1579    [PIPE_FORMAT_R10G10B10A2_UNORM] = 0x2a24,
1580    [PIPE_FORMAT_R10G10B10A2_UINT] = 0x2a24,
1581    [PIPE_FORMAT_B8G8R8A8_UNORM] = 0x2a24,
1582    [PIPE_FORMAT_R8G8B8A8_UNORM] = 0x2a24,
1583    [PIPE_FORMAT_R8G8B8A8_SNORM] = 0x2a24,
1584    [PIPE_FORMAT_R8G8B8A8_SINT] = 0x2a24,
1585    [PIPE_FORMAT_R8G8B8A8_UINT] = 0x2a24,
1586    [PIPE_FORMAT_R11G11B10_FLOAT] = 0x2a24,
1587 
1588    [PIPE_FORMAT_R16G16_UNORM] = 0x2524,
1589    [PIPE_FORMAT_R16G16_SNORM] = 0x2524,
1590    [PIPE_FORMAT_R16G16_SINT] = 0x2524,
1591    [PIPE_FORMAT_R16G16_UINT] = 0x2524,
1592    [PIPE_FORMAT_R16G16_FLOAT] = 0x2524,
1593 
1594    [PIPE_FORMAT_R32_SINT] = 0x2024,
1595    [PIPE_FORMAT_R32_UINT] = 0x2024,
1596    [PIPE_FORMAT_R32_FLOAT] = 0x2024,
1597 
1598    [PIPE_FORMAT_R8G8_UNORM] = 0x1615,
1599    [PIPE_FORMAT_R8G8_SNORM] = 0x1615,
1600    [PIPE_FORMAT_R8G8_SINT] = 0x1615,
1601    [PIPE_FORMAT_R8G8_UINT] = 0x1615,
1602 
1603    [PIPE_FORMAT_R16_UNORM] = 0x1115,
1604    [PIPE_FORMAT_R16_SNORM] = 0x1115,
1605    [PIPE_FORMAT_R16_SINT] = 0x1115,
1606    [PIPE_FORMAT_R16_UINT] = 0x1115,
1607    [PIPE_FORMAT_R16_FLOAT] = 0x1115,
1608 
1609    [PIPE_FORMAT_R8_UNORM] = 0x0206,
1610    [PIPE_FORMAT_R8_SNORM] = 0x0206,
1611    [PIPE_FORMAT_R8_SINT] = 0x0206,
1612    [PIPE_FORMAT_R8_UINT] = 0x0206
1613 };
1614 
1615 /* NOTE: These are hardcoded offsets for the shader library.
1616  * TODO: Automate them.
1617  */
1618 static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT] =
1619 {
1620    [PIPE_FORMAT_R32G32B32A32_FLOAT] = 0x218,
1621    [PIPE_FORMAT_R32G32B32A32_SINT]  = 0x218,
1622    [PIPE_FORMAT_R32G32B32A32_UINT]  = 0x218,
1623    [PIPE_FORMAT_R16G16B16A16_UNORM] = 0x248,
1624    [PIPE_FORMAT_R16G16B16A16_SNORM] = 0x2b8,
1625    [PIPE_FORMAT_R16G16B16A16_SINT]  = 0x330,
1626    [PIPE_FORMAT_R16G16B16A16_UINT]  = 0x388,
1627    [PIPE_FORMAT_R16G16B16A16_FLOAT] = 0x3d8,
1628    [PIPE_FORMAT_R32G32_FLOAT]       = 0x428,
1629    [PIPE_FORMAT_R32G32_SINT]        = 0x468,
1630    [PIPE_FORMAT_R32G32_UINT]        = 0x468,
1631    [PIPE_FORMAT_R10G10B10A2_UNORM]  = 0x4a8,
1632    [PIPE_FORMAT_R10G10B10A2_UINT]   = 0x530,
1633    [PIPE_FORMAT_R8G8B8A8_UNORM]     = 0x588,
1634    [PIPE_FORMAT_R8G8B8A8_SNORM]     = 0x5f8,
1635    [PIPE_FORMAT_R8G8B8A8_SINT]      = 0x670,
1636    [PIPE_FORMAT_R8G8B8A8_UINT]      = 0x6c8,
1637    [PIPE_FORMAT_B5G6R5_UNORM]       = 0x718,
1638    [PIPE_FORMAT_B5G5R5X1_UNORM]     = 0x7a0,
1639    [PIPE_FORMAT_R16G16_UNORM]       = 0x828,
1640    [PIPE_FORMAT_R16G16_SNORM]       = 0x890,
1641    [PIPE_FORMAT_R16G16_SINT]        = 0x8f0,
1642    [PIPE_FORMAT_R16G16_UINT]        = 0x948,
1643    [PIPE_FORMAT_R16G16_FLOAT]       = 0x998,
1644    [PIPE_FORMAT_R32_FLOAT]          = 0x9e8,
1645    [PIPE_FORMAT_R32_SINT]           = 0xa30,
1646    [PIPE_FORMAT_R32_UINT]           = 0xa30,
1647    [PIPE_FORMAT_R8G8_UNORM]         = 0xa78,
1648    [PIPE_FORMAT_R8G8_SNORM]         = 0xae0,
1649    [PIPE_FORMAT_R8G8_UINT]          = 0xb48,
1650    [PIPE_FORMAT_R8G8_SINT]          = 0xb98,
1651    [PIPE_FORMAT_R16_UNORM]          = 0xbe8,
1652    [PIPE_FORMAT_R16_SNORM]          = 0xc48,
1653    [PIPE_FORMAT_R16_SINT]           = 0xca0,
1654    [PIPE_FORMAT_R16_UINT]           = 0xce8,
1655    [PIPE_FORMAT_R16_FLOAT]          = 0xd30,
1656    [PIPE_FORMAT_R8_UNORM]           = 0xd88,
1657    [PIPE_FORMAT_R8_SNORM]           = 0xde0,
1658    [PIPE_FORMAT_R8_SINT]            = 0xe38,
1659    [PIPE_FORMAT_R8_UINT]            = 0xe88,
1660    [PIPE_FORMAT_R11G11B10_FLOAT]    = 0xed0
1661 };
1662