1 /*
2  * Copyright 2010 Christoph Bumiller
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19  * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20  * SOFTWARE.
21  */
22 
23 #include "util/u_format.h"
24 #include "util/u_format_s3tc.h"
25 #include "pipe/p_screen.h"
26 
27 #include "nv50_context.h"
28 #include "nv50_screen.h"
29 
30 #include "nouveau/nv_object.xml.h"
31 #include <errno.h>
32 
33 #ifndef NOUVEAU_GETPARAM_GRAPH_UNITS
34 # define NOUVEAU_GETPARAM_GRAPH_UNITS 13
35 #endif
36 
37 /* affected by LOCAL_WARPS_LOG_ALLOC / LOCAL_WARPS_NO_CLAMP */
38 #define LOCAL_WARPS_ALLOC 32
39 /* affected by STACK_WARPS_LOG_ALLOC / STACK_WARPS_NO_CLAMP */
40 #define STACK_WARPS_ALLOC 32
41 
42 #define THREADS_IN_WARP 32
43 
44 #define ONE_TEMP_SIZE (4/*vector*/ * sizeof(float))
45 
46 static boolean
nv50_screen_is_format_supported(struct pipe_screen * pscreen,enum pipe_format format,enum pipe_texture_target target,unsigned sample_count,unsigned bindings)47 nv50_screen_is_format_supported(struct pipe_screen *pscreen,
48                                 enum pipe_format format,
49                                 enum pipe_texture_target target,
50                                 unsigned sample_count,
51                                 unsigned bindings)
52 {
53    if (sample_count > 8)
54       return FALSE;
55    if (!(0x117 & (1 << sample_count))) /* 0, 1, 2, 4 or 8 */
56       return FALSE;
57    if (sample_count == 8 && util_format_get_blocksizebits(format) >= 128)
58       return FALSE;
59 
60    if (!util_format_is_supported(format, bindings))
61       return FALSE;
62 
63    switch (format) {
64    case PIPE_FORMAT_Z16_UNORM:
65       if (nv50_screen(pscreen)->tesla->oclass < NVA0_3D_CLASS)
66          return FALSE;
67       break;
68    case PIPE_FORMAT_R8G8B8A8_UNORM:
69    case PIPE_FORMAT_R8G8B8X8_UNORM:
70       /* HACK: GL requires equal formats for MS resolve and window is BGRA */
71       if (bindings & PIPE_BIND_RENDER_TARGET)
72          return FALSE;
73    default:
74       break;
75    }
76 
77    /* transfers & shared are always supported */
78    bindings &= ~(PIPE_BIND_TRANSFER_READ |
79                  PIPE_BIND_TRANSFER_WRITE |
80                  PIPE_BIND_SHARED);
81 
82    return (nv50_format_table[format].usage & bindings) == bindings;
83 }
84 
85 static int
nv50_screen_get_param(struct pipe_screen * pscreen,enum pipe_cap param)86 nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
87 {
88    const uint16_t class_3d = nouveau_screen(pscreen)->class_3d;
89 
90    switch (param) {
91    case PIPE_CAP_MAX_COMBINED_SAMPLERS:
92       return 64;
93    case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
94       return 14;
95    case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
96       return 12;
97    case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
98       return 14;
99    case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
100       return 512;
101    case PIPE_CAP_MIN_TEXEL_OFFSET:
102       return -8;
103    case PIPE_CAP_MAX_TEXEL_OFFSET:
104       return 7;
105    case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
106    case PIPE_CAP_TEXTURE_SWIZZLE:
107    case PIPE_CAP_TEXTURE_SHADOW_MAP:
108    case PIPE_CAP_NPOT_TEXTURES:
109    case PIPE_CAP_ANISOTROPIC_FILTER:
110    case PIPE_CAP_SCALED_RESOLVE:
111       return 1;
112    case PIPE_CAP_SEAMLESS_CUBE_MAP:
113       return nv50_screen(pscreen)->tesla->oclass >= NVA0_3D_CLASS;
114    case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
115       return 0;
116    case PIPE_CAP_TWO_SIDED_STENCIL:
117    case PIPE_CAP_DEPTH_CLIP_DISABLE:
118    case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
119    case PIPE_CAP_POINT_SPRITE:
120       return 1;
121    case PIPE_CAP_SM3:
122       return 1;
123    case PIPE_CAP_GLSL_FEATURE_LEVEL:
124       return 130;
125    case PIPE_CAP_MAX_RENDER_TARGETS:
126       return 8;
127    case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
128       return 1;
129    case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
130    case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
131    case PIPE_CAP_VERTEX_COLOR_CLAMPED:
132       return 1;
133    case PIPE_CAP_QUERY_TIMESTAMP:
134    case PIPE_CAP_TIMER_QUERY:
135    case PIPE_CAP_OCCLUSION_QUERY:
136       return 1;
137    case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
138       return 4;
139    case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
140    case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
141       return 64;
142    case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
143       return (class_3d >= NVA0_3D_CLASS) ? 1 : 0;
144    case PIPE_CAP_BLEND_EQUATION_SEPARATE:
145    case PIPE_CAP_INDEP_BLEND_ENABLE:
146       return 1;
147    case PIPE_CAP_INDEP_BLEND_FUNC:
148       return nv50_screen(pscreen)->tesla->oclass >= NVA3_3D_CLASS;
149    case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
150    case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
151       return 1;
152    case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
153    case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
154       return 0;
155    case PIPE_CAP_SHADER_STENCIL_EXPORT:
156       return 0;
157    case PIPE_CAP_PRIMITIVE_RESTART:
158    case PIPE_CAP_TGSI_INSTANCEID:
159    case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
160    case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
161    case PIPE_CAP_CONDITIONAL_RENDER:
162    case PIPE_CAP_TEXTURE_BARRIER:
163    case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
164    case PIPE_CAP_START_INSTANCE:
165       return 1;
166    case PIPE_CAP_TGSI_CAN_COMPACT_VARYINGS:
167    case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
168       return 0; /* state trackers will know better */
169    case PIPE_CAP_USER_CONSTANT_BUFFERS:
170    case PIPE_CAP_USER_INDEX_BUFFERS:
171    case PIPE_CAP_USER_VERTEX_BUFFERS:
172       return 1;
173    case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
174       return 256;
175    case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
176    case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
177    case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
178       return 0;
179    default:
180       NOUVEAU_ERR("unknown PIPE_CAP %d\n", param);
181       return 0;
182    }
183 }
184 
185 static int
nv50_screen_get_shader_param(struct pipe_screen * pscreen,unsigned shader,enum pipe_shader_cap param)186 nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
187                              enum pipe_shader_cap param)
188 {
189    switch (shader) {
190    case PIPE_SHADER_VERTEX:
191    case PIPE_SHADER_GEOMETRY:
192    case PIPE_SHADER_FRAGMENT:
193       break;
194    default:
195       return 0;
196    }
197 
198    switch (param) {
199    case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
200    case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
201    case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
202    case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
203       return 16384;
204    case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
205       return 4;
206    case PIPE_SHADER_CAP_MAX_INPUTS:
207       if (shader == PIPE_SHADER_VERTEX)
208          return 32;
209       return 0x300 / 16;
210    case PIPE_SHADER_CAP_MAX_CONSTS:
211       return 65536 / 16;
212    case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
213       return NV50_MAX_PIPE_CONSTBUFS;
214    case PIPE_SHADER_CAP_MAX_ADDRS:
215       return 1;
216    case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
217    case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
218       return shader != PIPE_SHADER_FRAGMENT;
219    case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
220    case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
221       return 1;
222    case PIPE_SHADER_CAP_MAX_PREDS:
223       return 0;
224    case PIPE_SHADER_CAP_MAX_TEMPS:
225       return nv50_screen(pscreen)->max_tls_space / ONE_TEMP_SIZE;
226    case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
227       return 1;
228    case PIPE_SHADER_CAP_SUBROUTINES:
229       return 0; /* please inline, or provide function declarations */
230    case PIPE_SHADER_CAP_INTEGERS:
231       return 1;
232    case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
233       return 32;
234    default:
235       NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
236       return 0;
237    }
238 }
239 
240 static float
nv50_screen_get_paramf(struct pipe_screen * pscreen,enum pipe_capf param)241 nv50_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
242 {
243    switch (param) {
244    case PIPE_CAPF_MAX_LINE_WIDTH:
245    case PIPE_CAPF_MAX_LINE_WIDTH_AA:
246       return 10.0f;
247    case PIPE_CAPF_MAX_POINT_WIDTH:
248    case PIPE_CAPF_MAX_POINT_WIDTH_AA:
249       return 64.0f;
250    case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
251       return 16.0f;
252    case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
253       return 4.0f;
254    default:
255       NOUVEAU_ERR("unknown PIPE_CAP %d\n", param);
256       return 0.0f;
257    }
258 }
259 
260 static void
nv50_screen_destroy(struct pipe_screen * pscreen)261 nv50_screen_destroy(struct pipe_screen *pscreen)
262 {
263    struct nv50_screen *screen = nv50_screen(pscreen);
264 
265    if (screen->base.fence.current) {
266       nouveau_fence_wait(screen->base.fence.current);
267       nouveau_fence_ref (NULL, &screen->base.fence.current);
268    }
269    if (screen->base.pushbuf)
270       screen->base.pushbuf->user_priv = NULL;
271 
272    if (screen->blitctx)
273       FREE(screen->blitctx);
274 
275    nouveau_bo_ref(NULL, &screen->code);
276    nouveau_bo_ref(NULL, &screen->tls_bo);
277    nouveau_bo_ref(NULL, &screen->stack_bo);
278    nouveau_bo_ref(NULL, &screen->txc);
279    nouveau_bo_ref(NULL, &screen->uniforms);
280    nouveau_bo_ref(NULL, &screen->fence.bo);
281 
282    nouveau_heap_destroy(&screen->vp_code_heap);
283    nouveau_heap_destroy(&screen->gp_code_heap);
284    nouveau_heap_destroy(&screen->fp_code_heap);
285 
286    if (screen->tic.entries)
287       FREE(screen->tic.entries);
288 
289    nouveau_object_del(&screen->tesla);
290    nouveau_object_del(&screen->eng2d);
291    nouveau_object_del(&screen->m2mf);
292    nouveau_object_del(&screen->sync);
293 
294    nouveau_screen_fini(&screen->base);
295 
296    FREE(screen);
297 }
298 
299 static void
nv50_screen_fence_emit(struct pipe_screen * pscreen,u32 * sequence)300 nv50_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence)
301 {
302    struct nv50_screen *screen = nv50_screen(pscreen);
303    struct nouveau_pushbuf *push = screen->base.pushbuf;
304 
305    /* we need to do it after possible flush in MARK_RING */
306    *sequence = ++screen->base.fence.sequence;
307 
308    PUSH_DATA (push, NV50_FIFO_PKHDR(NV50_3D(QUERY_ADDRESS_HIGH), 4));
309    PUSH_DATAh(push, screen->fence.bo->offset);
310    PUSH_DATA (push, screen->fence.bo->offset);
311    PUSH_DATA (push, *sequence);
312    PUSH_DATA (push, NV50_3D_QUERY_GET_MODE_WRITE_UNK0 |
313                     NV50_3D_QUERY_GET_UNK4 |
314                     NV50_3D_QUERY_GET_UNIT_CROP |
315                     NV50_3D_QUERY_GET_TYPE_QUERY |
316                     NV50_3D_QUERY_GET_QUERY_SELECT_ZERO |
317                     NV50_3D_QUERY_GET_SHORT);
318 }
319 
320 static u32
nv50_screen_fence_update(struct pipe_screen * pscreen)321 nv50_screen_fence_update(struct pipe_screen *pscreen)
322 {
323    return nv50_screen(pscreen)->fence.map[0];
324 }
325 
326 static void
nv50_screen_init_hwctx(struct nv50_screen * screen)327 nv50_screen_init_hwctx(struct nv50_screen *screen)
328 {
329    struct nouveau_pushbuf *push = screen->base.pushbuf;
330    struct nv04_fifo *fifo;
331    unsigned i;
332 
333    fifo = (struct nv04_fifo *)screen->base.channel->data;
334 
335    BEGIN_NV04(push, SUBC_M2MF(NV01_SUBCHAN_OBJECT), 1);
336    PUSH_DATA (push, screen->m2mf->handle);
337    BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_DMA_NOTIFY), 3);
338    PUSH_DATA (push, screen->sync->handle);
339    PUSH_DATA (push, fifo->vram);
340    PUSH_DATA (push, fifo->vram);
341 
342    BEGIN_NV04(push, SUBC_2D(NV01_SUBCHAN_OBJECT), 1);
343    PUSH_DATA (push, screen->eng2d->handle);
344    BEGIN_NV04(push, NV50_2D(DMA_NOTIFY), 4);
345    PUSH_DATA (push, screen->sync->handle);
346    PUSH_DATA (push, fifo->vram);
347    PUSH_DATA (push, fifo->vram);
348    PUSH_DATA (push, fifo->vram);
349    BEGIN_NV04(push, NV50_2D(OPERATION), 1);
350    PUSH_DATA (push, NV50_2D_OPERATION_SRCCOPY);
351    BEGIN_NV04(push, NV50_2D(CLIP_ENABLE), 1);
352    PUSH_DATA (push, 0);
353    BEGIN_NV04(push, NV50_2D(COLOR_KEY_ENABLE), 1);
354    PUSH_DATA (push, 0);
355    BEGIN_NV04(push, SUBC_2D(0x0888), 1);
356    PUSH_DATA (push, 1);
357 
358    BEGIN_NV04(push, SUBC_3D(NV01_SUBCHAN_OBJECT), 1);
359    PUSH_DATA (push, screen->tesla->handle);
360 
361    BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
362    PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS);
363 
364    BEGIN_NV04(push, NV50_3D(DMA_NOTIFY), 1);
365    PUSH_DATA (push, screen->sync->handle);
366    BEGIN_NV04(push, NV50_3D(DMA_ZETA), 11);
367    for (i = 0; i < 11; ++i)
368       PUSH_DATA(push, fifo->vram);
369    BEGIN_NV04(push, NV50_3D(DMA_COLOR(0)), NV50_3D_DMA_COLOR__LEN);
370    for (i = 0; i < NV50_3D_DMA_COLOR__LEN; ++i)
371       PUSH_DATA(push, fifo->vram);
372 
373    BEGIN_NV04(push, NV50_3D(REG_MODE), 1);
374    PUSH_DATA (push, NV50_3D_REG_MODE_STRIPED);
375    BEGIN_NV04(push, NV50_3D(UNK1400_LANES), 1);
376    PUSH_DATA (push, 0xf);
377 
378    if (debug_get_bool_option("NOUVEAU_SHADER_WATCHDOG", TRUE)) {
379       BEGIN_NV04(push, NV50_3D(WATCHDOG_TIMER), 1);
380       PUSH_DATA (push, 0x18);
381    }
382 
383    BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1);
384    PUSH_DATA (push, 1);
385 
386    BEGIN_NV04(push, NV50_3D(CSAA_ENABLE), 1);
387    PUSH_DATA (push, 0);
388    BEGIN_NV04(push, NV50_3D(MULTISAMPLE_ENABLE), 1);
389    PUSH_DATA (push, 0);
390    BEGIN_NV04(push, NV50_3D(MULTISAMPLE_MODE), 1);
391    PUSH_DATA (push, NV50_3D_MULTISAMPLE_MODE_MS1);
392    BEGIN_NV04(push, NV50_3D(MULTISAMPLE_CTRL), 1);
393    PUSH_DATA (push, 0);
394    BEGIN_NV04(push, NV50_3D(LINE_LAST_PIXEL), 1);
395    PUSH_DATA (push, 0);
396    BEGIN_NV04(push, NV50_3D(BLEND_SEPARATE_ALPHA), 1);
397    PUSH_DATA (push, 1);
398 
399    if (screen->tesla->oclass >= NVA0_3D_CLASS) {
400       BEGIN_NV04(push, SUBC_3D(NVA0_3D_TEX_MISC), 1);
401       PUSH_DATA (push, NVA0_3D_TEX_MISC_SEAMLESS_CUBE_MAP);
402    }
403 
404    BEGIN_NV04(push, NV50_3D(SCREEN_Y_CONTROL), 1);
405    PUSH_DATA (push, 0);
406    BEGIN_NV04(push, NV50_3D(WINDOW_OFFSET_X), 2);
407    PUSH_DATA (push, 0);
408    PUSH_DATA (push, 0);
409    BEGIN_NV04(push, NV50_3D(ZCULL_REGION), 1);
410    PUSH_DATA (push, 0x3f);
411 
412    BEGIN_NV04(push, NV50_3D(VP_ADDRESS_HIGH), 2);
413    PUSH_DATAh(push, screen->code->offset + (0 << NV50_CODE_BO_SIZE_LOG2));
414    PUSH_DATA (push, screen->code->offset + (0 << NV50_CODE_BO_SIZE_LOG2));
415 
416    BEGIN_NV04(push, NV50_3D(FP_ADDRESS_HIGH), 2);
417    PUSH_DATAh(push, screen->code->offset + (1 << NV50_CODE_BO_SIZE_LOG2));
418    PUSH_DATA (push, screen->code->offset + (1 << NV50_CODE_BO_SIZE_LOG2));
419 
420    BEGIN_NV04(push, NV50_3D(GP_ADDRESS_HIGH), 2);
421    PUSH_DATAh(push, screen->code->offset + (2 << NV50_CODE_BO_SIZE_LOG2));
422    PUSH_DATA (push, screen->code->offset + (2 << NV50_CODE_BO_SIZE_LOG2));
423 
424    BEGIN_NV04(push, NV50_3D(LOCAL_ADDRESS_HIGH), 3);
425    PUSH_DATAh(push, screen->tls_bo->offset);
426    PUSH_DATA (push, screen->tls_bo->offset);
427    PUSH_DATA (push, util_logbase2(screen->cur_tls_space / 8));
428 
429    BEGIN_NV04(push, NV50_3D(STACK_ADDRESS_HIGH), 3);
430    PUSH_DATAh(push, screen->stack_bo->offset);
431    PUSH_DATA (push, screen->stack_bo->offset);
432    PUSH_DATA (push, 4);
433 
434    BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
435    PUSH_DATAh(push, screen->uniforms->offset + (0 << 16));
436    PUSH_DATA (push, screen->uniforms->offset + (0 << 16));
437    PUSH_DATA (push, (NV50_CB_PVP << 16) | 0x0000);
438 
439    BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
440    PUSH_DATAh(push, screen->uniforms->offset + (1 << 16));
441    PUSH_DATA (push, screen->uniforms->offset + (1 << 16));
442    PUSH_DATA (push, (NV50_CB_PGP << 16) | 0x0000);
443 
444    BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
445    PUSH_DATAh(push, screen->uniforms->offset + (2 << 16));
446    PUSH_DATA (push, screen->uniforms->offset + (2 << 16));
447    PUSH_DATA (push, (NV50_CB_PFP << 16) | 0x0000);
448 
449    BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
450    PUSH_DATAh(push, screen->uniforms->offset + (3 << 16));
451    PUSH_DATA (push, screen->uniforms->offset + (3 << 16));
452    PUSH_DATA (push, (NV50_CB_AUX << 16) | 0x0200);
453 
454    BEGIN_NI04(push, NV50_3D(SET_PROGRAM_CB), 3);
455    PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf01);
456    PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf21);
457    PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf31);
458 
459    /* return { 0.0, 0.0, 0.0, 0.0 } on out-of-bounds vtxbuf access */
460    BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
461    PUSH_DATA (push, ((1 << 9) << 6) | NV50_CB_AUX);
462    BEGIN_NI04(push, NV50_3D(CB_DATA(0)), 4);
463    PUSH_DATAf(push, 0.0f);
464    PUSH_DATAf(push, 0.0f);
465    PUSH_DATAf(push, 0.0f);
466    PUSH_DATAf(push, 0.0f);
467    BEGIN_NV04(push, NV50_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2);
468    PUSH_DATAh(push, screen->uniforms->offset + (3 << 16) + (1 << 9));
469    PUSH_DATA (push, screen->uniforms->offset + (3 << 16) + (1 << 9));
470 
471    /* max TIC (bits 4:8) & TSC bindings, per program type */
472    for (i = 0; i < 3; ++i) {
473       BEGIN_NV04(push, NV50_3D(TEX_LIMITS(i)), 1);
474       PUSH_DATA (push, 0x54);
475    }
476 
477    BEGIN_NV04(push, NV50_3D(TIC_ADDRESS_HIGH), 3);
478    PUSH_DATAh(push, screen->txc->offset);
479    PUSH_DATA (push, screen->txc->offset);
480    PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1);
481 
482    BEGIN_NV04(push, NV50_3D(TSC_ADDRESS_HIGH), 3);
483    PUSH_DATAh(push, screen->txc->offset + 65536);
484    PUSH_DATA (push, screen->txc->offset + 65536);
485    PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1);
486 
487    BEGIN_NV04(push, NV50_3D(LINKED_TSC), 1);
488    PUSH_DATA (push, 0);
489 
490    BEGIN_NV04(push, NV50_3D(CLIP_RECTS_EN), 1);
491    PUSH_DATA (push, 0);
492    BEGIN_NV04(push, NV50_3D(CLIP_RECTS_MODE), 1);
493    PUSH_DATA (push, NV50_3D_CLIP_RECTS_MODE_INSIDE_ANY);
494    BEGIN_NV04(push, NV50_3D(CLIP_RECT_HORIZ(0)), 8 * 2);
495    for (i = 0; i < 8 * 2; ++i)
496       PUSH_DATA(push, 0);
497    BEGIN_NV04(push, NV50_3D(CLIPID_ENABLE), 1);
498    PUSH_DATA (push, 0);
499 
500    BEGIN_NV04(push, NV50_3D(VIEWPORT_TRANSFORM_EN), 1);
501    PUSH_DATA (push, 1);
502    BEGIN_NV04(push, NV50_3D(DEPTH_RANGE_NEAR(0)), 2);
503    PUSH_DATAf(push, 0.0f);
504    PUSH_DATAf(push, 1.0f);
505 
506    BEGIN_NV04(push, NV50_3D(VIEW_VOLUME_CLIP_CTRL), 1);
507 #ifdef NV50_SCISSORS_CLIPPING
508    PUSH_DATA (push, 0x0000);
509 #else
510    PUSH_DATA (push, 0x1080);
511 #endif
512 
513    BEGIN_NV04(push, NV50_3D(CLEAR_FLAGS), 1);
514    PUSH_DATA (push, NV50_3D_CLEAR_FLAGS_CLEAR_RECT_VIEWPORT);
515 
516    /* We use scissors instead of exact view volume clipping,
517     * so they're always enabled.
518     */
519    BEGIN_NV04(push, NV50_3D(SCISSOR_ENABLE(0)), 3);
520    PUSH_DATA (push, 1);
521    PUSH_DATA (push, 8192 << 16);
522    PUSH_DATA (push, 8192 << 16);
523 
524    BEGIN_NV04(push, NV50_3D(RASTERIZE_ENABLE), 1);
525    PUSH_DATA (push, 1);
526    BEGIN_NV04(push, NV50_3D(POINT_RASTER_RULES), 1);
527    PUSH_DATA (push, NV50_3D_POINT_RASTER_RULES_OGL);
528    BEGIN_NV04(push, NV50_3D(FRAG_COLOR_CLAMP_EN), 1);
529    PUSH_DATA (push, 0x11111111);
530    BEGIN_NV04(push, NV50_3D(EDGEFLAG), 1);
531    PUSH_DATA (push, 1);
532 
533    PUSH_KICK (push);
534 }
535 
nv50_tls_alloc(struct nv50_screen * screen,unsigned tls_space,uint64_t * tls_size)536 static int nv50_tls_alloc(struct nv50_screen *screen, unsigned tls_space,
537       uint64_t *tls_size)
538 {
539    struct nouveau_device *dev = screen->base.device;
540    int ret;
541 
542    screen->cur_tls_space = util_next_power_of_two(tls_space / ONE_TEMP_SIZE) *
543          ONE_TEMP_SIZE;
544    if (nouveau_mesa_debug)
545       debug_printf("allocating space for %u temps\n",
546             util_next_power_of_two(tls_space / ONE_TEMP_SIZE));
547    *tls_size = screen->cur_tls_space * util_next_power_of_two(screen->TPs) *
548          screen->MPsInTP * LOCAL_WARPS_ALLOC * THREADS_IN_WARP;
549 
550    ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16,
551                         *tls_size, NULL, &screen->tls_bo);
552    if (ret) {
553       NOUVEAU_ERR("Failed to allocate local bo: %d\n", ret);
554       return ret;
555    }
556 
557    return 0;
558 }
559 
nv50_tls_realloc(struct nv50_screen * screen,unsigned tls_space)560 int nv50_tls_realloc(struct nv50_screen *screen, unsigned tls_space)
561 {
562    struct nouveau_pushbuf *push = screen->base.pushbuf;
563    int ret;
564    uint64_t tls_size;
565 
566    if (tls_space < screen->cur_tls_space)
567       return 0;
568    if (tls_space > screen->max_tls_space) {
569       /* fixable by limiting number of warps (LOCAL_WARPS_LOG_ALLOC /
570        * LOCAL_WARPS_NO_CLAMP) */
571       NOUVEAU_ERR("Unsupported number of temporaries (%u > %u). Fixable if someone cares.\n",
572             (unsigned)(tls_space / ONE_TEMP_SIZE),
573             (unsigned)(screen->max_tls_space / ONE_TEMP_SIZE));
574       return -ENOMEM;
575    }
576 
577    nouveau_bo_ref(NULL, &screen->tls_bo);
578    ret = nv50_tls_alloc(screen, tls_space, &tls_size);
579    if (ret)
580       return ret;
581 
582    BEGIN_NV04(push, NV50_3D(LOCAL_ADDRESS_HIGH), 3);
583    PUSH_DATAh(push, screen->tls_bo->offset);
584    PUSH_DATA (push, screen->tls_bo->offset);
585    PUSH_DATA (push, util_logbase2(screen->cur_tls_space / 8));
586 
587    return 1;
588 }
589 
590 struct pipe_screen *
nv50_screen_create(struct nouveau_device * dev)591 nv50_screen_create(struct nouveau_device *dev)
592 {
593    struct nv50_screen *screen;
594    struct pipe_screen *pscreen;
595    struct nouveau_object *chan;
596    uint64_t value;
597    uint32_t tesla_class;
598    unsigned stack_size;
599    int ret;
600 
601    screen = CALLOC_STRUCT(nv50_screen);
602    if (!screen)
603       return NULL;
604    pscreen = &screen->base.base;
605 
606    ret = nouveau_screen_init(&screen->base, dev);
607    if (ret) {
608       NOUVEAU_ERR("nouveau_screen_init failed: %d\n", ret);
609       goto fail;
610    }
611 
612    /* TODO: Prevent FIFO prefetch before transfer of index buffers and
613     *  admit them to VRAM.
614     */
615    screen->base.vidmem_bindings |= PIPE_BIND_CONSTANT_BUFFER |
616       PIPE_BIND_VERTEX_BUFFER;
617    screen->base.sysmem_bindings |=
618       PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER;
619 
620    screen->base.pushbuf->user_priv = screen;
621    screen->base.pushbuf->rsvd_kick = 5;
622 
623    chan = screen->base.channel;
624 
625    pscreen->destroy = nv50_screen_destroy;
626    pscreen->context_create = nv50_create;
627    pscreen->is_format_supported = nv50_screen_is_format_supported;
628    pscreen->get_param = nv50_screen_get_param;
629    pscreen->get_shader_param = nv50_screen_get_shader_param;
630    pscreen->get_paramf = nv50_screen_get_paramf;
631 
632    nv50_screen_init_resource_functions(pscreen);
633 
634    nouveau_screen_init_vdec(&screen->base);
635 
636    ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096,
637                         NULL, &screen->fence.bo);
638    if (ret) {
639       NOUVEAU_ERR("Failed to allocate fence bo: %d\n", ret);
640       goto fail;
641    }
642 
643    nouveau_bo_map(screen->fence.bo, 0, NULL);
644    screen->fence.map = screen->fence.bo->map;
645    screen->base.fence.emit = nv50_screen_fence_emit;
646    screen->base.fence.update = nv50_screen_fence_update;
647 
648    ret = nouveau_object_new(chan, 0xbeef0301, NOUVEAU_NOTIFIER_CLASS,
649                             &(struct nv04_notify){ .length = 32 },
650                             sizeof(struct nv04_notify), &screen->sync);
651    if (ret) {
652       NOUVEAU_ERR("Failed to allocate notifier: %d\n", ret);
653       goto fail;
654    }
655 
656    ret = nouveau_object_new(chan, 0xbeef5039, NV50_M2MF_CLASS,
657                             NULL, 0, &screen->m2mf);
658    if (ret) {
659       NOUVEAU_ERR("Failed to allocate PGRAPH context for M2MF: %d\n", ret);
660       goto fail;
661    }
662 
663    ret = nouveau_object_new(chan, 0xbeef502d, NV50_2D_CLASS,
664                             NULL, 0, &screen->eng2d);
665    if (ret) {
666       NOUVEAU_ERR("Failed to allocate PGRAPH context for 2D: %d\n", ret);
667       goto fail;
668    }
669 
670    switch (dev->chipset & 0xf0) {
671    case 0x50:
672       tesla_class = NV50_3D_CLASS;
673       break;
674    case 0x80:
675    case 0x90:
676       tesla_class = NV84_3D_CLASS;
677       break;
678    case 0xa0:
679       switch (dev->chipset) {
680       case 0xa0:
681       case 0xaa:
682       case 0xac:
683          tesla_class = NVA0_3D_CLASS;
684          break;
685       case 0xaf:
686          tesla_class = NVAF_3D_CLASS;
687          break;
688       default:
689          tesla_class = NVA3_3D_CLASS;
690          break;
691       }
692       break;
693    default:
694       NOUVEAU_ERR("Not a known NV50 chipset: NV%02x\n", dev->chipset);
695       goto fail;
696    }
697    screen->base.class_3d = tesla_class;
698 
699    ret = nouveau_object_new(chan, 0xbeef5097, tesla_class,
700                             NULL, 0, &screen->tesla);
701    if (ret) {
702       NOUVEAU_ERR("Failed to allocate PGRAPH context for 3D: %d\n", ret);
703       goto fail;
704    }
705 
706    ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16,
707                         3 << NV50_CODE_BO_SIZE_LOG2, NULL, &screen->code);
708    if (ret) {
709       NOUVEAU_ERR("Failed to allocate code bo: %d\n", ret);
710       goto fail;
711    }
712 
713    nouveau_heap_init(&screen->vp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2);
714    nouveau_heap_init(&screen->gp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2);
715    nouveau_heap_init(&screen->fp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2);
716 
717    nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value);
718 
719    screen->TPs = util_bitcount(value & 0xffff);
720    screen->MPsInTP = util_bitcount((value >> 24) & 0xf);
721 
722    stack_size = util_next_power_of_two(screen->TPs) * screen->MPsInTP *
723          STACK_WARPS_ALLOC * 64 * 8;
724 
725    ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, stack_size, NULL,
726                         &screen->stack_bo);
727    if (ret) {
728       NOUVEAU_ERR("Failed to allocate stack bo: %d\n", ret);
729       goto fail;
730    }
731 
732    uint64_t size_of_one_temp = util_next_power_of_two(screen->TPs) *
733          screen->MPsInTP * LOCAL_WARPS_ALLOC *  THREADS_IN_WARP *
734          ONE_TEMP_SIZE;
735    screen->max_tls_space = dev->vram_size / size_of_one_temp * ONE_TEMP_SIZE;
736    screen->max_tls_space /= 2; /* half of vram */
737 
738    /* hw can address max 64 KiB */
739    screen->max_tls_space = MIN2(screen->max_tls_space, 64 << 10);
740 
741    uint64_t tls_size;
742    unsigned tls_space = 4/*temps*/ * ONE_TEMP_SIZE;
743    ret = nv50_tls_alloc(screen, tls_space, &tls_size);
744    if (ret)
745       goto fail;
746 
747    if (nouveau_mesa_debug)
748       debug_printf("TPs = %u, MPsInTP = %u, VRAM = %"PRIu64" MiB, tls_size = %"PRIu64" KiB\n",
749             screen->TPs, screen->MPsInTP, dev->vram_size >> 20, tls_size >> 10);
750 
751    ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, 4 << 16, NULL,
752                         &screen->uniforms);
753    if (ret) {
754       NOUVEAU_ERR("Failed to allocate uniforms bo: %d\n", ret);
755       goto fail;
756    }
757 
758    ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, 3 << 16, NULL,
759                         &screen->txc);
760    if (ret) {
761       NOUVEAU_ERR("Failed to allocate TIC/TSC bo: %d\n", ret);
762       goto fail;
763    }
764 
765    screen->tic.entries = CALLOC(4096, sizeof(void *));
766    screen->tsc.entries = screen->tic.entries + 2048;
767 
768    if (!nv50_blitctx_create(screen))
769       goto fail;
770 
771    nv50_screen_init_hwctx(screen);
772 
773    nouveau_fence_new(&screen->base, &screen->base.fence.current, FALSE);
774 
775    return pscreen;
776 
777 fail:
778    nv50_screen_destroy(pscreen);
779    return NULL;
780 }
781 
782 int
nv50_screen_tic_alloc(struct nv50_screen * screen,void * entry)783 nv50_screen_tic_alloc(struct nv50_screen *screen, void *entry)
784 {
785    int i = screen->tic.next;
786 
787    while (screen->tic.lock[i / 32] & (1 << (i % 32)))
788       i = (i + 1) & (NV50_TIC_MAX_ENTRIES - 1);
789 
790    screen->tic.next = (i + 1) & (NV50_TIC_MAX_ENTRIES - 1);
791 
792    if (screen->tic.entries[i])
793       nv50_tic_entry(screen->tic.entries[i])->id = -1;
794 
795    screen->tic.entries[i] = entry;
796    return i;
797 }
798 
799 int
nv50_screen_tsc_alloc(struct nv50_screen * screen,void * entry)800 nv50_screen_tsc_alloc(struct nv50_screen *screen, void *entry)
801 {
802    int i = screen->tsc.next;
803 
804    while (screen->tsc.lock[i / 32] & (1 << (i % 32)))
805       i = (i + 1) & (NV50_TSC_MAX_ENTRIES - 1);
806 
807    screen->tsc.next = (i + 1) & (NV50_TSC_MAX_ENTRIES - 1);
808 
809    if (screen->tsc.entries[i])
810       nv50_tsc_entry(screen->tsc.entries[i])->id = -1;
811 
812    screen->tsc.entries[i] = entry;
813    return i;
814 }
815