1 /*
2  Copyright 2003 VMware, Inc.
3  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
4  Intel funded Tungsten Graphics to
5  develop this 3D driver.
6 
7  Permission is hereby granted, free of charge, to any person obtaining
8  a copy of this software and associated documentation files (the
9  "Software"), to deal in the Software without restriction, including
10  without limitation the rights to use, copy, modify, merge, publish,
11  distribute, sublicense, and/or sell copies of the Software, and to
12  permit persons to whom the Software is furnished to do so, subject to
13  the following conditions:
14 
15  The above copyright notice and this permission notice (including the
16  next paragraph) shall be included in all copies or substantial
17  portions of the Software.
18 
19  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 
27  **********************************************************************/
28  /*
29   * Authors:
30   *   Keith Whitwell <keithw@vmware.com>
31   */
32 
33 
34 #include "compiler/nir/nir.h"
35 #include "main/api_exec.h"
36 #include "main/context.h"
37 #include "main/fbobject.h"
38 #include "main/extensions.h"
39 #include "main/glthread.h"
40 #include "main/macros.h"
41 #include "main/points.h"
42 #include "main/version.h"
43 #include "main/vtxfmt.h"
44 #include "main/texobj.h"
45 #include "main/framebuffer.h"
46 #include "main/stencil.h"
47 #include "main/state.h"
48 #include "main/spirv_extensions.h"
49 
50 #include "vbo/vbo.h"
51 
52 #include "drivers/common/driverfuncs.h"
53 #include "drivers/common/meta.h"
54 #include "utils.h"
55 
56 #include "brw_context.h"
57 #include "brw_defines.h"
58 #include "brw_blorp.h"
59 #include "brw_draw.h"
60 #include "brw_state.h"
61 
62 #include "intel_batchbuffer.h"
63 #include "intel_buffer_objects.h"
64 #include "intel_buffers.h"
65 #include "intel_fbo.h"
66 #include "intel_mipmap_tree.h"
67 #include "intel_pixel.h"
68 #include "intel_image.h"
69 #include "intel_tex.h"
70 #include "intel_tex_obj.h"
71 
72 #include "swrast_setup/swrast_setup.h"
73 #include "tnl/tnl.h"
74 #include "tnl/t_pipeline.h"
75 #include "util/ralloc.h"
76 #include "util/debug.h"
77 #include "util/disk_cache.h"
78 #include "util/u_memory.h"
79 #include "isl/isl.h"
80 
81 #include "common/gen_defines.h"
82 
83 #include "compiler/spirv/nir_spirv.h"
84 /***************************************
85  * Mesa's Driver Functions
86  ***************************************/
87 
88 const char *const brw_vendor_string = "Intel Open Source Technology Center";
89 
90 static const char *
get_bsw_model(const struct intel_screen * screen)91 get_bsw_model(const struct intel_screen *screen)
92 {
93    switch (screen->eu_total) {
94    case 16:
95       return "405";
96    case 12:
97       return "400";
98    default:
99       return "   ";
100    }
101 }
102 
103 const char *
brw_get_renderer_string(const struct intel_screen * screen)104 brw_get_renderer_string(const struct intel_screen *screen)
105 {
106    static char buf[128];
107    const char *name = gen_get_device_name(screen->deviceID);
108 
109    if (!name)
110       name = "Intel Unknown";
111 
112    snprintf(buf, sizeof(buf), "Mesa DRI %s", name);
113 
114    /* Braswell branding is funny, so we have to fix it up here */
115    if (screen->deviceID == 0x22B1) {
116       char *needle = strstr(buf, "XXX");
117       if (needle)
118          memcpy(needle, get_bsw_model(screen), 3);
119    }
120 
121    return buf;
122 }
123 
124 static const GLubyte *
intel_get_string(struct gl_context * ctx,GLenum name)125 intel_get_string(struct gl_context * ctx, GLenum name)
126 {
127    const struct brw_context *const brw = brw_context(ctx);
128 
129    switch (name) {
130    case GL_VENDOR:
131       return (GLubyte *) brw_vendor_string;
132 
133    case GL_RENDERER:
134       return
135          (GLubyte *) brw_get_renderer_string(brw->screen);
136 
137    default:
138       return NULL;
139    }
140 }
141 
142 static void
brw_set_background_context(struct gl_context * ctx,UNUSED struct util_queue_monitoring * queue_info)143 brw_set_background_context(struct gl_context *ctx,
144                            UNUSED struct util_queue_monitoring *queue_info)
145 {
146    struct brw_context *brw = brw_context(ctx);
147    __DRIcontext *driContext = brw->driContext;
148    __DRIscreen *driScreen = driContext->driScreenPriv;
149    const __DRIbackgroundCallableExtension *backgroundCallable =
150       driScreen->dri2.backgroundCallable;
151 
152    /* Note: Mesa will only call this function if we've called
153     * _mesa_enable_multithreading().  We only do that if the loader exposed
154     * the __DRI_BACKGROUND_CALLABLE extension.  So we know that
155     * backgroundCallable is not NULL.
156     */
157    backgroundCallable->setBackgroundContext(driContext->loaderPrivate);
158 }
159 
160 static void
intel_viewport(struct gl_context * ctx)161 intel_viewport(struct gl_context *ctx)
162 {
163    struct brw_context *brw = brw_context(ctx);
164    __DRIcontext *driContext = brw->driContext;
165 
166    if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
167       if (driContext->driDrawablePriv)
168          dri2InvalidateDrawable(driContext->driDrawablePriv);
169       if (driContext->driReadablePriv)
170          dri2InvalidateDrawable(driContext->driReadablePriv);
171    }
172 }
173 
174 static void
intel_update_framebuffer(struct gl_context * ctx,struct gl_framebuffer * fb)175 intel_update_framebuffer(struct gl_context *ctx,
176                          struct gl_framebuffer *fb)
177 {
178    struct brw_context *brw = brw_context(ctx);
179 
180    /* Quantize the derived default number of samples
181     */
182    fb->DefaultGeometry._NumSamples =
183       intel_quantize_num_samples(brw->screen,
184                                  fb->DefaultGeometry.NumSamples);
185 }
186 
187 static void
intel_update_state(struct gl_context * ctx)188 intel_update_state(struct gl_context * ctx)
189 {
190    GLuint new_state = ctx->NewState;
191    struct brw_context *brw = brw_context(ctx);
192 
193    if (ctx->swrast_context)
194       _swrast_InvalidateState(ctx, new_state);
195 
196    brw->NewGLState |= new_state;
197 
198    if (new_state & (_NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT))
199       _mesa_update_draw_buffer_bounds(ctx, ctx->DrawBuffer);
200 
201    if (new_state & (_NEW_STENCIL | _NEW_BUFFERS)) {
202       brw->stencil_enabled = _mesa_stencil_is_enabled(ctx);
203       brw->stencil_two_sided = _mesa_stencil_is_two_sided(ctx);
204       brw->stencil_write_enabled =
205          _mesa_stencil_is_write_enabled(ctx, brw->stencil_two_sided);
206    }
207 
208    if (new_state & _NEW_POLYGON)
209       brw->polygon_front_bit = _mesa_polygon_get_front_bit(ctx);
210 
211    if (new_state & _NEW_BUFFERS) {
212       intel_update_framebuffer(ctx, ctx->DrawBuffer);
213       if (ctx->DrawBuffer != ctx->ReadBuffer)
214          intel_update_framebuffer(ctx, ctx->ReadBuffer);
215    }
216 }
217 
218 #define flushFront(screen)      ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
219 
220 static void
intel_flush_front(struct gl_context * ctx)221 intel_flush_front(struct gl_context *ctx)
222 {
223    struct brw_context *brw = brw_context(ctx);
224    __DRIcontext *driContext = brw->driContext;
225    __DRIdrawable *driDrawable = driContext->driDrawablePriv;
226    __DRIscreen *const dri_screen = brw->screen->driScrnPriv;
227 
228    if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
229       if (flushFront(dri_screen) && driDrawable &&
230           driDrawable->loaderPrivate) {
231 
232          /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
233           *
234           * This potentially resolves both front and back buffer. It
235           * is unnecessary to resolve the back, but harms nothing except
236           * performance. And no one cares about front-buffer render
237           * performance.
238           */
239          intel_resolve_for_dri2_flush(brw, driDrawable);
240          intel_batchbuffer_flush(brw);
241 
242          flushFront(dri_screen)(driDrawable, driDrawable->loaderPrivate);
243 
244          /* We set the dirty bit in intel_prepare_render() if we're
245           * front buffer rendering once we get there.
246           */
247          brw->front_buffer_dirty = false;
248       }
249    }
250 }
251 
252 static void
brw_display_shared_buffer(struct brw_context * brw)253 brw_display_shared_buffer(struct brw_context *brw)
254 {
255    __DRIcontext *dri_context = brw->driContext;
256    __DRIdrawable *dri_drawable = dri_context->driDrawablePriv;
257    __DRIscreen *dri_screen = brw->screen->driScrnPriv;
258    int fence_fd = -1;
259 
260    if (!brw->is_shared_buffer_bound)
261       return;
262 
263    if (!brw->is_shared_buffer_dirty)
264       return;
265 
266    if (brw->screen->has_exec_fence) {
267       /* This function is always called during a flush operation, so there is
268        * no need to flush again here. But we want to provide a fence_fd to the
269        * loader, and a redundant flush is the easiest way to acquire one.
270        */
271       if (intel_batchbuffer_flush_fence(brw, -1, &fence_fd))
272          return;
273    }
274 
275    dri_screen->mutableRenderBuffer.loader
276       ->displaySharedBuffer(dri_drawable, fence_fd,
277                             dri_drawable->loaderPrivate);
278    brw->is_shared_buffer_dirty = false;
279 }
280 
281 static void
intel_glFlush(struct gl_context * ctx)282 intel_glFlush(struct gl_context *ctx)
283 {
284    struct brw_context *brw = brw_context(ctx);
285 
286    intel_batchbuffer_flush(brw);
287    intel_flush_front(ctx);
288    brw_display_shared_buffer(brw);
289    brw->need_flush_throttle = true;
290 }
291 
292 static void
intel_glEnable(struct gl_context * ctx,GLenum cap,GLboolean state)293 intel_glEnable(struct gl_context *ctx, GLenum cap, GLboolean state)
294 {
295    struct brw_context *brw = brw_context(ctx);
296 
297    switch (cap) {
298    case GL_BLACKHOLE_RENDER_INTEL:
299       brw->frontend_noop = state;
300       intel_batchbuffer_flush(brw);
301       intel_batchbuffer_maybe_noop(brw);
302       /* Because we started previous batches with a potential
303        * MI_BATCH_BUFFER_END if NOOP was enabled, that means that anything
304        * that was ever emitted after that never made it to the HW. So when the
305        * blackhole state changes from NOOP->!NOOP reupload the entire state.
306        */
307       if (!brw->frontend_noop) {
308          brw->NewGLState = ~0u;
309          brw->ctx.NewDriverState = ~0ull;
310       }
311       break;
312    default:
313       break;
314    }
315 }
316 
317 static void
intel_finish(struct gl_context * ctx)318 intel_finish(struct gl_context * ctx)
319 {
320    struct brw_context *brw = brw_context(ctx);
321 
322    intel_glFlush(ctx);
323 
324    if (brw->batch.last_bo)
325       brw_bo_wait_rendering(brw->batch.last_bo);
326 }
327 
328 static void
brw_init_driver_functions(struct brw_context * brw,struct dd_function_table * functions)329 brw_init_driver_functions(struct brw_context *brw,
330                           struct dd_function_table *functions)
331 {
332    const struct gen_device_info *devinfo = &brw->screen->devinfo;
333 
334    _mesa_init_driver_functions(functions);
335 
336    /* GLX uses DRI2 invalidate events to handle window resizing.
337     * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
338     * which doesn't provide a mechanism for snooping the event queues.
339     *
340     * So EGL still relies on viewport hacks to handle window resizing.
341     * This should go away with DRI3000.
342     */
343    if (!brw->driContext->driScreenPriv->dri2.useInvalidate)
344       functions->Viewport = intel_viewport;
345 
346    functions->Enable = intel_glEnable;
347    functions->Flush = intel_glFlush;
348    functions->Finish = intel_finish;
349    functions->GetString = intel_get_string;
350    functions->UpdateState = intel_update_state;
351 
352    brw_init_draw_functions(functions);
353    intelInitTextureFuncs(functions);
354    intelInitTextureImageFuncs(functions);
355    intelInitTextureCopyImageFuncs(functions);
356    intelInitCopyImageFuncs(functions);
357    intelInitClearFuncs(functions);
358    intelInitBufferFuncs(functions);
359    intelInitPixelFuncs(functions);
360    intelInitBufferObjectFuncs(functions);
361    brw_init_syncobj_functions(functions);
362    brw_init_object_purgeable_functions(functions);
363 
364    brwInitFragProgFuncs( functions );
365    brw_init_common_queryobj_functions(functions);
366    if (devinfo->gen >= 8 || devinfo->is_haswell)
367       hsw_init_queryobj_functions(functions);
368    else if (devinfo->gen >= 6)
369       gen6_init_queryobj_functions(functions);
370    else
371       gen4_init_queryobj_functions(functions);
372    brw_init_compute_functions(functions);
373    brw_init_conditional_render_functions(functions);
374 
375    functions->GenerateMipmap = brw_generate_mipmap;
376 
377    functions->QueryInternalFormat = brw_query_internal_format;
378 
379    functions->NewTransformFeedback = brw_new_transform_feedback;
380    functions->DeleteTransformFeedback = brw_delete_transform_feedback;
381    if (can_do_mi_math_and_lrr(brw->screen)) {
382       functions->BeginTransformFeedback = hsw_begin_transform_feedback;
383       functions->EndTransformFeedback = hsw_end_transform_feedback;
384       functions->PauseTransformFeedback = hsw_pause_transform_feedback;
385       functions->ResumeTransformFeedback = hsw_resume_transform_feedback;
386    } else if (devinfo->gen >= 7) {
387       functions->BeginTransformFeedback = gen7_begin_transform_feedback;
388       functions->EndTransformFeedback = gen7_end_transform_feedback;
389       functions->PauseTransformFeedback = gen7_pause_transform_feedback;
390       functions->ResumeTransformFeedback = gen7_resume_transform_feedback;
391       functions->GetTransformFeedbackVertexCount =
392          brw_get_transform_feedback_vertex_count;
393    } else {
394       functions->BeginTransformFeedback = brw_begin_transform_feedback;
395       functions->EndTransformFeedback = brw_end_transform_feedback;
396       functions->PauseTransformFeedback = brw_pause_transform_feedback;
397       functions->ResumeTransformFeedback = brw_resume_transform_feedback;
398       functions->GetTransformFeedbackVertexCount =
399          brw_get_transform_feedback_vertex_count;
400    }
401 
402    if (devinfo->gen >= 6)
403       functions->GetSamplePosition = gen6_get_sample_position;
404 
405    /* GL_ARB_get_program_binary */
406    brw_program_binary_init(brw->screen->deviceID);
407    functions->GetProgramBinaryDriverSHA1 = brw_get_program_binary_driver_sha1;
408    functions->ProgramBinarySerializeDriverBlob = brw_serialize_program_binary;
409    functions->ProgramBinaryDeserializeDriverBlob =
410       brw_deserialize_program_binary;
411 
412    if (brw->screen->disk_cache) {
413       functions->ShaderCacheSerializeDriverBlob = brw_program_serialize_nir;
414    }
415 
416    functions->SetBackgroundContext = brw_set_background_context;
417 }
418 
419 static void
brw_initialize_spirv_supported_capabilities(struct brw_context * brw)420 brw_initialize_spirv_supported_capabilities(struct brw_context *brw)
421 {
422    const struct gen_device_info *devinfo = &brw->screen->devinfo;
423    struct gl_context *ctx = &brw->ctx;
424 
425    /* The following SPIR-V capabilities are only supported on gen7+. In theory
426     * you should enable the extension only on gen7+, but just in case let's
427     * assert it.
428     */
429    assert(devinfo->gen >= 7);
430 
431    ctx->Const.SpirVCapabilities.atomic_storage = devinfo->gen >= 7;
432    ctx->Const.SpirVCapabilities.draw_parameters = true;
433    ctx->Const.SpirVCapabilities.float64 = devinfo->gen >= 8;
434    ctx->Const.SpirVCapabilities.geometry_streams = devinfo->gen >= 7;
435    ctx->Const.SpirVCapabilities.image_write_without_format = true;
436    ctx->Const.SpirVCapabilities.int64 = devinfo->gen >= 8;
437    ctx->Const.SpirVCapabilities.tessellation = true;
438    ctx->Const.SpirVCapabilities.transform_feedback = devinfo->gen >= 7;
439    ctx->Const.SpirVCapabilities.variable_pointers = true;
440    ctx->Const.SpirVCapabilities.integer_functions2 = devinfo->gen >= 8;
441 }
442 
443 static void
brw_initialize_context_constants(struct brw_context * brw)444 brw_initialize_context_constants(struct brw_context *brw)
445 {
446    const struct gen_device_info *devinfo = &brw->screen->devinfo;
447    struct gl_context *ctx = &brw->ctx;
448    const struct brw_compiler *compiler = brw->screen->compiler;
449 
450    const bool stage_exists[MESA_SHADER_STAGES] = {
451       [MESA_SHADER_VERTEX] = true,
452       [MESA_SHADER_TESS_CTRL] = devinfo->gen >= 7,
453       [MESA_SHADER_TESS_EVAL] = devinfo->gen >= 7,
454       [MESA_SHADER_GEOMETRY] = devinfo->gen >= 6,
455       [MESA_SHADER_FRAGMENT] = true,
456       [MESA_SHADER_COMPUTE] =
457          (_mesa_is_desktop_gl(ctx) &&
458           ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) ||
459          (ctx->API == API_OPENGLES2 &&
460           ctx->Const.MaxComputeWorkGroupSize[0] >= 128),
461    };
462 
463    unsigned num_stages = 0;
464    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
465       if (stage_exists[i])
466          num_stages++;
467    }
468 
469    unsigned max_samplers =
470       devinfo->gen >= 8 || devinfo->is_haswell ? BRW_MAX_TEX_UNIT : 16;
471 
472    ctx->Const.MaxDualSourceDrawBuffers = 1;
473    ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
474    ctx->Const.MaxCombinedShaderOutputResources =
475       MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
476 
477    /* The timestamp register we can read for glGetTimestamp() is
478     * sometimes only 32 bits, before scaling to nanoseconds (depending
479     * on kernel).
480     *
481     * Once scaled to nanoseconds the timestamp would roll over at a
482     * non-power-of-two, so an application couldn't use
483     * GL_QUERY_COUNTER_BITS to handle rollover correctly.  Instead, we
484     * report 36 bits and truncate at that (rolling over 5 times as
485     * often as the HW counter), and when the 32-bit counter rolls
486     * over, it happens to also be at a rollover in the reported value
487     * from near (1<<36) to 0.
488     *
489     * The low 32 bits rolls over in ~343 seconds.  Our 36-bit result
490     * rolls over every ~69 seconds.
491     */
492    ctx->Const.QueryCounterBits.Timestamp = 36;
493 
494    ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
495    ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
496    if (devinfo->gen >= 7) {
497       ctx->Const.MaxRenderbufferSize = 16384;
498       ctx->Const.MaxTextureSize = 16384;
499       ctx->Const.MaxCubeTextureLevels = 15; /* 16384 */
500    } else {
501       ctx->Const.MaxRenderbufferSize = 8192;
502       ctx->Const.MaxTextureSize = 8192;
503       ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
504    }
505    ctx->Const.Max3DTextureLevels = 12; /* 2048 */
506    ctx->Const.MaxArrayTextureLayers = devinfo->gen >= 7 ? 2048 : 512;
507    ctx->Const.MaxTextureMbytes = 1536;
508    ctx->Const.MaxTextureRectSize = devinfo->gen >= 7 ? 16384 : 8192;
509    ctx->Const.MaxTextureMaxAnisotropy = 16.0;
510    ctx->Const.MaxTextureLodBias = 15.0;
511    ctx->Const.StripTextureBorder = true;
512    if (devinfo->gen >= 7) {
513       ctx->Const.MaxProgramTextureGatherComponents = 4;
514       ctx->Const.MinProgramTextureGatherOffset = -32;
515       ctx->Const.MaxProgramTextureGatherOffset = 31;
516    } else if (devinfo->gen == 6) {
517       ctx->Const.MaxProgramTextureGatherComponents = 1;
518       ctx->Const.MinProgramTextureGatherOffset = -8;
519       ctx->Const.MaxProgramTextureGatherOffset = 7;
520    }
521 
522    ctx->Const.MaxUniformBlockSize = 65536;
523 
524    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
525       struct gl_program_constants *prog = &ctx->Const.Program[i];
526 
527       if (!stage_exists[i])
528          continue;
529 
530       prog->MaxTextureImageUnits = max_samplers;
531 
532       prog->MaxUniformBlocks = BRW_MAX_UBO;
533       prog->MaxCombinedUniformComponents =
534          prog->MaxUniformComponents +
535          ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
536 
537       prog->MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
538       prog->MaxAtomicBuffers = BRW_MAX_ABO;
539       prog->MaxImageUniforms = compiler->scalar_stage[i] ? BRW_MAX_IMAGES : 0;
540       prog->MaxShaderStorageBlocks = BRW_MAX_SSBO;
541    }
542 
543    ctx->Const.MaxTextureUnits =
544       MIN2(ctx->Const.MaxTextureCoordUnits,
545            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
546 
547    ctx->Const.MaxUniformBufferBindings = num_stages * BRW_MAX_UBO;
548    ctx->Const.MaxCombinedUniformBlocks = num_stages * BRW_MAX_UBO;
549    ctx->Const.MaxCombinedAtomicBuffers = num_stages * BRW_MAX_ABO;
550    ctx->Const.MaxCombinedShaderStorageBlocks = num_stages * BRW_MAX_SSBO;
551    ctx->Const.MaxShaderStorageBufferBindings = num_stages * BRW_MAX_SSBO;
552    ctx->Const.MaxCombinedTextureImageUnits = num_stages * max_samplers;
553    ctx->Const.MaxCombinedImageUniforms = num_stages * BRW_MAX_IMAGES;
554 
555 
556    /* Hardware only supports a limited number of transform feedback buffers.
557     * So we need to override the Mesa default (which is based only on software
558     * limits).
559     */
560    ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
561 
562    /* On Gen6, in the worst case, we use up one binding table entry per
563     * transform feedback component (see comments above the definition of
564     * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
565     * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
566     * BRW_MAX_SOL_BINDINGS.
567     *
568     * In "separate components" mode, we need to divide this value by
569     * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
570     * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
571     */
572    ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
573    ctx->Const.MaxTransformFeedbackSeparateComponents =
574       BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
575 
576    ctx->Const.AlwaysUseGetTransformFeedbackVertexCount =
577       !can_do_mi_math_and_lrr(brw->screen);
578 
579    int max_samples;
580    const int *msaa_modes = intel_supported_msaa_modes(brw->screen);
581    const int clamp_max_samples =
582       driQueryOptioni(&brw->optionCache, "clamp_max_samples");
583 
584    if (clamp_max_samples < 0) {
585       max_samples = msaa_modes[0];
586    } else {
587       /* Select the largest supported MSAA mode that does not exceed
588        * clamp_max_samples.
589        */
590       max_samples = 0;
591       for (int i = 0; msaa_modes[i] != 0; ++i) {
592          if (msaa_modes[i] <= clamp_max_samples) {
593             max_samples = msaa_modes[i];
594             break;
595          }
596       }
597    }
598 
599    ctx->Const.MaxSamples = max_samples;
600    ctx->Const.MaxColorTextureSamples = max_samples;
601    ctx->Const.MaxDepthTextureSamples = max_samples;
602    ctx->Const.MaxIntegerSamples = max_samples;
603    ctx->Const.MaxImageSamples = 0;
604 
605    ctx->Const.MinLineWidth = 1.0;
606    ctx->Const.MinLineWidthAA = 1.0;
607    if (devinfo->gen >= 6) {
608       ctx->Const.MaxLineWidth = 7.375;
609       ctx->Const.MaxLineWidthAA = 7.375;
610       ctx->Const.LineWidthGranularity = 0.125;
611    } else {
612       ctx->Const.MaxLineWidth = 7.0;
613       ctx->Const.MaxLineWidthAA = 7.0;
614       ctx->Const.LineWidthGranularity = 0.5;
615    }
616 
617    /* For non-antialiased lines, we have to round the line width to the
618     * nearest whole number. Make sure that we don't advertise a line
619     * width that, when rounded, will be beyond the actual hardware
620     * maximum.
621     */
622    assert(roundf(ctx->Const.MaxLineWidth) <= ctx->Const.MaxLineWidth);
623 
624    ctx->Const.MinPointSize = 1.0;
625    ctx->Const.MinPointSizeAA = 1.0;
626    ctx->Const.MaxPointSize = 255.0;
627    ctx->Const.MaxPointSizeAA = 255.0;
628    ctx->Const.PointSizeGranularity = 1.0;
629 
630    if (devinfo->gen >= 5 || devinfo->is_g4x)
631       ctx->Const.MaxClipPlanes = 8;
632 
633    ctx->Const.GLSLFragCoordIsSysVal = true;
634    ctx->Const.GLSLFrontFacingIsSysVal = true;
635    ctx->Const.GLSLTessLevelsAsInputs = true;
636    ctx->Const.PrimitiveRestartForPatches = true;
637 
638    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
639    ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0;
640    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0;
641    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0;
642    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0;
643    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0;
644    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0;
645    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16;
646    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256;
647    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1;
648    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024;
649    ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams =
650       MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters,
651 	   ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams);
652 
653    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024;
654    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024;
655    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024;
656    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024;
657    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12;
658    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256;
659    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0;
660    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024;
661    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams =
662       MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters,
663 	   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams);
664 
665    /* Fragment shaders use real, 32-bit twos-complement integers for all
666     * integer types.
667     */
668    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31;
669    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30;
670    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0;
671    ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
672    ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
673 
674    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31;
675    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30;
676    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0;
677    ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
678    ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
679 
680    /* Gen6 converts quads to polygon in beginning of 3D pipeline,
681     * but we're not sure how it's actually done for vertex order,
682     * that affect provoking vertex decision. Always use last vertex
683     * convention for quad primitive which works as expected for now.
684     */
685    if (devinfo->gen >= 6)
686       ctx->Const.QuadsFollowProvokingVertexConvention = false;
687 
688    ctx->Const.NativeIntegers = true;
689 
690    /* Regarding the CMP instruction, the Ivybridge PRM says:
691     *
692     *   "For each enabled channel 0b or 1b is assigned to the appropriate flag
693     *    bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord
694     *    0xFFFFFFFF) is assigned to dst."
695     *
696     * but PRMs for earlier generations say
697     *
698     *   "In dword format, one GRF may store up to 8 results. When the register
699     *    is used later as a vector of Booleans, as only LSB at each channel
700     *    contains meaning [sic] data, software should make sure all higher bits
701     *    are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."
702     *
703     * We select the representation of a true boolean uniform to be ~0, and fix
704     * the results of Gen <= 5 CMP instruction's with -(result & 1).
705     */
706    ctx->Const.UniformBooleanTrue = ~0;
707 
708    /* From the gen4 PRM, volume 4 page 127:
709     *
710     *     "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
711     *      the base address of the first element of the surface, computed in
712     *      software by adding the surface base address to the byte offset of
713     *      the element in the buffer."
714     *
715     * However, unaligned accesses are slower, so enforce buffer alignment.
716     *
717     * In order to push UBO data, 3DSTATE_CONSTANT_XS imposes an additional
718     * restriction: the start of the buffer needs to be 32B aligned.
719     */
720    ctx->Const.UniformBufferOffsetAlignment = 32;
721 
722    /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so
723     * that we can safely have the CPU and GPU writing the same SSBO on
724     * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never
725     * writes, so there's no problem. For an SSBO, the GPU and the CPU can
726     * be updating disjoint regions of the buffer simultaneously and that will
727     * break if the regions overlap the same cacheline.
728     */
729    ctx->Const.ShaderStorageBufferOffsetAlignment = 64;
730    ctx->Const.TextureBufferOffsetAlignment = 16;
731    ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
732 
733    if (devinfo->gen >= 6) {
734       ctx->Const.MaxVarying = 32;
735       ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
736       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents =
737          compiler->scalar_stage[MESA_SHADER_GEOMETRY] ? 128 : 64;
738       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
739       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
740       ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents = 128;
741       ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents = 128;
742       ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents = 128;
743       ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents = 128;
744    }
745 
746    /* We want the GLSL compiler to emit code that uses condition codes */
747    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
748       ctx->Const.ShaderCompilerOptions[i] =
749          brw->screen->compiler->glsl_compiler_options[i];
750    }
751 
752    if (devinfo->gen >= 7) {
753       ctx->Const.MaxViewportWidth = 32768;
754       ctx->Const.MaxViewportHeight = 32768;
755    }
756 
757    /* ARB_viewport_array, OES_viewport_array */
758    if (devinfo->gen >= 6) {
759       ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS;
760       ctx->Const.ViewportSubpixelBits = 8;
761 
762       /* Cast to float before negating because MaxViewportWidth is unsigned.
763        */
764       ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth;
765       ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth;
766    }
767 
768    /* ARB_gpu_shader5 */
769    if (devinfo->gen >= 7)
770       ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
771 
772    /* ARB_framebuffer_no_attachments */
773    ctx->Const.MaxFramebufferWidth = 16384;
774    ctx->Const.MaxFramebufferHeight = 16384;
775    ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers;
776    ctx->Const.MaxFramebufferSamples = max_samples;
777 
778    /* OES_primitive_bounding_box */
779    ctx->Const.NoPrimitiveBoundingBoxOutput = true;
780 
781    /* TODO: We should be able to use STD430 packing by default on all hardware
782     * but some piglit tests [1] currently fail on SNB when this is enabled.
783     * The problem is the messages we're using for doing uniform pulls
784     * in the vec4 back-end on SNB is the OWORD block load instruction, which
785     * takes its offset in units of OWORDS (16 bytes).  On IVB+, we use the
786     * sampler which doesn't have these restrictions.
787     *
788     * In the scalar back-end, we use the sampler for dynamic uniform loads and
789     * pull an entire cache line at a time for constant offset loads both of
790     * which support almost any alignment.
791     *
792     * [1] glsl-1.40/uniform_buffer/vs-float-array-variable-index.shader_test
793     */
794    if (devinfo->gen >= 7)
795       ctx->Const.UseSTD430AsDefaultPacking = true;
796 
797    if (!(ctx->Const.ContextFlags & GL_CONTEXT_FLAG_DEBUG_BIT))
798       ctx->Const.AllowMappedBuffersDuringExecution = true;
799 
800    /* GL_ARB_get_program_binary */
801    ctx->Const.NumProgramBinaryFormats = 1;
802 }
803 
804 static void
brw_initialize_cs_context_constants(struct brw_context * brw)805 brw_initialize_cs_context_constants(struct brw_context *brw)
806 {
807    struct gl_context *ctx = &brw->ctx;
808    const struct intel_screen *screen = brw->screen;
809    struct gen_device_info *devinfo = &brw->screen->devinfo;
810 
811    /* FINISHME: Do this for all platforms that the kernel supports */
812    if (devinfo->is_cherryview &&
813        screen->subslice_total > 0 && screen->eu_total > 0) {
814       /* Logical CS threads = EUs per subslice * 7 threads per EU */
815       uint32_t max_cs_threads = screen->eu_total / screen->subslice_total * 7;
816 
817       /* Fuse configurations may give more threads than expected, never less. */
818       if (max_cs_threads > devinfo->max_cs_threads)
819          devinfo->max_cs_threads = max_cs_threads;
820    }
821 
822    /* Maximum number of scalar compute shader invocations that can be run in
823     * parallel in the same subslice assuming SIMD32 dispatch.
824     *
825     * We don't advertise more than 64 threads, because we are limited to 64 by
826     * our usage of thread_width_max in the gpgpu walker command. This only
827     * currently impacts Haswell, which otherwise might be able to advertise 70
828     * threads. With SIMD32 and 64 threads, Haswell still provides twice the
829     * required the number of invocation needed for ARB_compute_shader.
830     */
831    const unsigned max_threads = MIN2(64, devinfo->max_cs_threads);
832    const uint32_t max_invocations = 32 * max_threads;
833    ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
834    ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
835    ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations;
836    ctx->Const.MaxComputeWorkGroupInvocations = max_invocations;
837    ctx->Const.MaxComputeSharedMemorySize = 64 * 1024;
838 
839    /* Constants used for ARB_compute_variable_group_size. */
840    if (devinfo->gen >= 7) {
841       assert(max_invocations >= 512);
842       ctx->Const.MaxComputeVariableGroupSize[0] = max_invocations;
843       ctx->Const.MaxComputeVariableGroupSize[1] = max_invocations;
844       ctx->Const.MaxComputeVariableGroupSize[2] = max_invocations;
845       ctx->Const.MaxComputeVariableGroupInvocations = max_invocations;
846    }
847 }
848 
849 /**
850  * Process driconf (drirc) options, setting appropriate context flags.
851  *
852  * intelInitExtensions still pokes at optionCache directly, in order to
853  * avoid advertising various extensions.  No flags are set, so it makes
854  * sense to continue doing that there.
855  */
856 static void
brw_process_driconf_options(struct brw_context * brw)857 brw_process_driconf_options(struct brw_context *brw)
858 {
859    const struct gen_device_info *devinfo = &brw->screen->devinfo;
860    struct gl_context *ctx = &brw->ctx;
861 
862    driOptionCache *options = &brw->optionCache;
863    driParseConfigFiles(options, &brw->screen->optionCache,
864                        brw->driContext->driScreenPriv->myNum,
865                        "i965", NULL, NULL, 0, NULL, 0);
866 
867    if (INTEL_DEBUG & DEBUG_NO_HIZ) {
868        brw->has_hiz = false;
869        /* On gen6, you can only do separate stencil with HIZ. */
870        if (devinfo->gen == 6)
871           brw->has_separate_stencil = false;
872    }
873 
874    if (driQueryOptionb(options, "mesa_no_error"))
875       ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR;
876 
877    if (driQueryOptionb(options, "always_flush_batch")) {
878       fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
879       brw->always_flush_batch = true;
880    }
881 
882    if (driQueryOptionb(options, "always_flush_cache")) {
883       fprintf(stderr, "flushing GPU caches before/after each draw call\n");
884       brw->always_flush_cache = true;
885    }
886 
887    if (driQueryOptionb(options, "disable_throttling")) {
888       fprintf(stderr, "disabling flush throttling\n");
889       brw->disable_throttling = true;
890    }
891 
892    brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
893 
894    if (driQueryOptionb(&brw->optionCache, "precise_trig"))
895       brw->screen->compiler->precise_trig = true;
896 
897    ctx->Const.ForceGLSLExtensionsWarn =
898       driQueryOptionb(options, "force_glsl_extensions_warn");
899 
900    ctx->Const.ForceGLSLVersion =
901       driQueryOptioni(options, "force_glsl_version");
902 
903    ctx->Const.DisableGLSLLineContinuations =
904       driQueryOptionb(options, "disable_glsl_line_continuations");
905 
906    ctx->Const.AllowGLSLExtensionDirectiveMidShader =
907       driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
908 
909    ctx->Const.AllowGLSLBuiltinVariableRedeclaration =
910       driQueryOptionb(options, "allow_glsl_builtin_variable_redeclaration");
911 
912    ctx->Const.AllowHigherCompatVersion =
913       driQueryOptionb(options, "allow_higher_compat_version");
914 
915    ctx->Const.ForceGLSLAbsSqrt =
916       driQueryOptionb(options, "force_glsl_abs_sqrt");
917 
918    ctx->Const.GLSLZeroInit = driQueryOptionb(options, "glsl_zero_init") ? 1 : 0;
919 
920    brw->dual_color_blend_by_location =
921       driQueryOptionb(options, "dual_color_blend_by_location");
922 
923    ctx->Const.AllowGLSLCrossStageInterpolationMismatch =
924       driQueryOptionb(options, "allow_glsl_cross_stage_interpolation_mismatch");
925 
926    char *vendor_str = driQueryOptionstr(options, "force_gl_vendor");
927    /* not an empty string */
928    if (*vendor_str)
929       ctx->Const.VendorOverride = vendor_str;
930 
931    ctx->Const.dri_config_options_sha1 = ralloc_array(brw, unsigned char, 20);
932    driComputeOptionsSha1(&brw->screen->optionCache,
933                          ctx->Const.dri_config_options_sha1);
934 }
935 
936 GLboolean
brwCreateContext(gl_api api,const struct gl_config * mesaVis,__DRIcontext * driContextPriv,const struct __DriverContextConfig * ctx_config,unsigned * dri_ctx_error,void * sharedContextPrivate)937 brwCreateContext(gl_api api,
938                  const struct gl_config *mesaVis,
939                  __DRIcontext *driContextPriv,
940                  const struct __DriverContextConfig *ctx_config,
941                  unsigned *dri_ctx_error,
942                  void *sharedContextPrivate)
943 {
944    struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
945    struct intel_screen *screen = driContextPriv->driScreenPriv->driverPrivate;
946    const struct gen_device_info *devinfo = &screen->devinfo;
947    struct dd_function_table functions;
948 
949    /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
950     * provides us with context reset notifications.
951     */
952    uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG |
953                             __DRI_CTX_FLAG_FORWARD_COMPATIBLE |
954                             __DRI_CTX_FLAG_NO_ERROR;
955 
956    if (screen->has_context_reset_notification)
957       allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;
958 
959    if (ctx_config->flags & ~allowed_flags) {
960       *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
961       return false;
962    }
963 
964    if (ctx_config->attribute_mask &
965        ~(__DRIVER_CONTEXT_ATTRIB_RESET_STRATEGY |
966          __DRIVER_CONTEXT_ATTRIB_PRIORITY)) {
967       *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_ATTRIBUTE;
968       return false;
969    }
970 
971    bool notify_reset =
972       ((ctx_config->attribute_mask & __DRIVER_CONTEXT_ATTRIB_RESET_STRATEGY) &&
973        ctx_config->reset_strategy != __DRI_CTX_RESET_NO_NOTIFICATION);
974 
975    struct brw_context *brw = rzalloc(NULL, struct brw_context);
976    if (!brw) {
977       fprintf(stderr, "%s: failed to alloc context\n", __func__);
978       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
979       return false;
980    }
981    brw->perf_ctx = gen_perf_new_context(brw);
982 
983    driContextPriv->driverPrivate = brw;
984    brw->driContext = driContextPriv;
985    brw->screen = screen;
986    brw->bufmgr = screen->bufmgr;
987 
988    brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
989    brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
990 
991    brw->has_swizzling = screen->hw_has_swizzling;
992 
993    brw->isl_dev = screen->isl_dev;
994 
995    brw->vs.base.stage = MESA_SHADER_VERTEX;
996    brw->tcs.base.stage = MESA_SHADER_TESS_CTRL;
997    brw->tes.base.stage = MESA_SHADER_TESS_EVAL;
998    brw->gs.base.stage = MESA_SHADER_GEOMETRY;
999    brw->wm.base.stage = MESA_SHADER_FRAGMENT;
1000    brw->cs.base.stage = MESA_SHADER_COMPUTE;
1001 
1002    brw_init_driver_functions(brw, &functions);
1003 
1004    if (notify_reset)
1005       functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;
1006 
1007    brw_process_driconf_options(brw);
1008 
1009    if (api == API_OPENGL_CORE &&
1010        driQueryOptionb(&screen->optionCache, "force_compat_profile")) {
1011       api = API_OPENGL_COMPAT;
1012    }
1013 
1014    struct gl_context *ctx = &brw->ctx;
1015 
1016    if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
1017       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
1018       fprintf(stderr, "%s: failed to init mesa context\n", __func__);
1019       intelDestroyContext(driContextPriv);
1020       return false;
1021    }
1022 
1023    driContextSetFlags(ctx, ctx_config->flags);
1024 
1025    /* Initialize the software rasterizer and helper modules.
1026     *
1027     * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
1028     * software fallbacks (which we have to support on legacy GL to do weird
1029     * glDrawPixels(), glBitmap(), and other functions).
1030     */
1031    if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
1032       _swrast_CreateContext(ctx);
1033    }
1034 
1035    _vbo_CreateContext(ctx, true);
1036    if (ctx->swrast_context) {
1037       _tnl_CreateContext(ctx);
1038       TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
1039       _swsetup_CreateContext(ctx);
1040 
1041       /* Configure swrast to match hardware characteristics: */
1042       _swrast_allow_pixel_fog(ctx, false);
1043       _swrast_allow_vertex_fog(ctx, true);
1044    }
1045 
1046    _mesa_meta_init(ctx);
1047 
1048    if (INTEL_DEBUG & DEBUG_PERF)
1049       brw->perf_debug = true;
1050 
1051    brw_initialize_cs_context_constants(brw);
1052    brw_initialize_context_constants(brw);
1053 
1054    ctx->Const.ResetStrategy = notify_reset
1055       ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB;
1056 
1057    /* Reinitialize the context point state.  It depends on ctx->Const values. */
1058    _mesa_init_point(ctx);
1059 
1060    intel_fbo_init(brw);
1061 
1062    intel_batchbuffer_init(brw);
1063 
1064    /* Create a new hardware context.  Using a hardware context means that
1065     * our GPU state will be saved/restored on context switch, allowing us
1066     * to assume that the GPU is in the same state we left it in.
1067     *
1068     * This is required for transform feedback buffer offsets, query objects,
1069     * and also allows us to reduce how much state we have to emit.
1070     */
1071    brw->hw_ctx = brw_create_hw_context(brw->bufmgr);
1072    if (!brw->hw_ctx && devinfo->gen >= 6) {
1073       fprintf(stderr, "Failed to create hardware context.\n");
1074       intelDestroyContext(driContextPriv);
1075       return false;
1076    }
1077 
1078    if (brw->hw_ctx) {
1079       int hw_priority = GEN_CONTEXT_MEDIUM_PRIORITY;
1080       if (ctx_config->attribute_mask & __DRIVER_CONTEXT_ATTRIB_PRIORITY) {
1081          switch (ctx_config->priority) {
1082          case __DRI_CTX_PRIORITY_LOW:
1083             hw_priority = GEN_CONTEXT_LOW_PRIORITY;
1084             break;
1085          case __DRI_CTX_PRIORITY_HIGH:
1086             hw_priority = GEN_CONTEXT_HIGH_PRIORITY;
1087             break;
1088          }
1089       }
1090       if (hw_priority != I915_CONTEXT_DEFAULT_PRIORITY &&
1091           brw_hw_context_set_priority(brw->bufmgr, brw->hw_ctx, hw_priority)) {
1092          fprintf(stderr,
1093 		 "Failed to set priority [%d:%d] for hardware context.\n",
1094                  ctx_config->priority, hw_priority);
1095          intelDestroyContext(driContextPriv);
1096          return false;
1097       }
1098    }
1099 
1100    if (brw_init_pipe_control(brw, devinfo)) {
1101       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
1102       intelDestroyContext(driContextPriv);
1103       return false;
1104    }
1105 
1106    brw_upload_init(&brw->upload, brw->bufmgr, 65536);
1107 
1108    brw_init_state(brw);
1109 
1110    intelInitExtensions(ctx);
1111 
1112    brw_init_surface_formats(brw);
1113 
1114    brw_blorp_init(brw);
1115 
1116    brw->urb.size = devinfo->urb.size;
1117 
1118    if (devinfo->gen == 6)
1119       brw->urb.gs_present = false;
1120 
1121    brw->prim_restart.in_progress = false;
1122    brw->prim_restart.enable_cut_index = false;
1123    brw->gs.enabled = false;
1124    brw->clip.viewport_count = 1;
1125 
1126    brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
1127 
1128    brw->max_gtt_map_object_size = screen->max_gtt_map_object_size;
1129 
1130    ctx->VertexProgram._MaintainTnlProgram = true;
1131    ctx->FragmentProgram._MaintainTexEnvProgram = true;
1132 
1133    brw_draw_init( brw );
1134 
1135    if ((ctx_config->flags & __DRI_CTX_FLAG_DEBUG) != 0) {
1136       /* Turn on some extra GL_ARB_debug_output generation. */
1137       brw->perf_debug = true;
1138    }
1139 
1140    if ((ctx_config->flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0) {
1141       ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
1142       ctx->Const.RobustAccess = GL_TRUE;
1143    }
1144 
1145    if (INTEL_DEBUG & DEBUG_SHADER_TIME)
1146       brw_init_shader_time(brw);
1147 
1148    _mesa_override_extensions(ctx);
1149    _mesa_compute_version(ctx);
1150 
1151    /* GL_ARB_gl_spirv */
1152    if (ctx->Extensions.ARB_gl_spirv) {
1153       brw_initialize_spirv_supported_capabilities(brw);
1154 
1155       if (ctx->Extensions.ARB_spirv_extensions) {
1156          /* GL_ARB_spirv_extensions */
1157          ctx->Const.SpirVExtensions = MALLOC_STRUCT(spirv_supported_extensions);
1158          _mesa_fill_supported_spirv_extensions(ctx->Const.SpirVExtensions,
1159                                                &ctx->Const.SpirVCapabilities);
1160       }
1161    }
1162 
1163    _mesa_initialize_dispatch_tables(ctx);
1164    _mesa_initialize_vbo_vtxfmt(ctx);
1165 
1166    if (ctx->Extensions.INTEL_performance_query)
1167       brw_init_performance_queries(brw);
1168 
1169    brw->ctx.Cache = brw->screen->disk_cache;
1170 
1171    if (driContextPriv->driScreenPriv->dri2.backgroundCallable &&
1172        driQueryOptionb(&screen->optionCache, "mesa_glthread")) {
1173       /* Loader supports multithreading, and so do we. */
1174       _mesa_glthread_init(ctx);
1175    }
1176 
1177    return true;
1178 }
1179 
1180 void
intelDestroyContext(__DRIcontext * driContextPriv)1181 intelDestroyContext(__DRIcontext * driContextPriv)
1182 {
1183    struct brw_context *brw =
1184       (struct brw_context *) driContextPriv->driverPrivate;
1185    struct gl_context *ctx = &brw->ctx;
1186 
1187    GET_CURRENT_CONTEXT(curctx);
1188 
1189    if (curctx == NULL) {
1190       /* No current context, but we need one to release
1191        * renderbuffer surface when we release framebuffer.
1192        * So temporarily bind the context.
1193        */
1194       _mesa_make_current(ctx, NULL, NULL);
1195    }
1196 
1197    _mesa_glthread_destroy(&brw->ctx);
1198 
1199    _mesa_meta_free(&brw->ctx);
1200 
1201    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
1202       /* Force a report. */
1203       brw->shader_time.report_time = 0;
1204 
1205       brw_collect_and_report_shader_time(brw);
1206       brw_destroy_shader_time(brw);
1207    }
1208 
1209    blorp_finish(&brw->blorp);
1210 
1211    brw_destroy_state(brw);
1212    brw_draw_destroy(brw);
1213 
1214    brw_bo_unreference(brw->curbe.curbe_bo);
1215 
1216    brw_bo_unreference(brw->vs.base.scratch_bo);
1217    brw_bo_unreference(brw->tcs.base.scratch_bo);
1218    brw_bo_unreference(brw->tes.base.scratch_bo);
1219    brw_bo_unreference(brw->gs.base.scratch_bo);
1220    brw_bo_unreference(brw->wm.base.scratch_bo);
1221 
1222    brw_bo_unreference(brw->vs.base.push_const_bo);
1223    brw_bo_unreference(brw->tcs.base.push_const_bo);
1224    brw_bo_unreference(brw->tes.base.push_const_bo);
1225    brw_bo_unreference(brw->gs.base.push_const_bo);
1226    brw_bo_unreference(brw->wm.base.push_const_bo);
1227 
1228    brw_destroy_hw_context(brw->bufmgr, brw->hw_ctx);
1229 
1230    if (ctx->swrast_context) {
1231       _swsetup_DestroyContext(&brw->ctx);
1232       _tnl_DestroyContext(&brw->ctx);
1233    }
1234    _vbo_DestroyContext(&brw->ctx);
1235 
1236    if (ctx->swrast_context)
1237       _swrast_DestroyContext(&brw->ctx);
1238 
1239    brw_fini_pipe_control(brw);
1240    intel_batchbuffer_free(&brw->batch);
1241 
1242    brw_bo_unreference(brw->throttle_batch[1]);
1243    brw_bo_unreference(brw->throttle_batch[0]);
1244    brw->throttle_batch[1] = NULL;
1245    brw->throttle_batch[0] = NULL;
1246 
1247    driDestroyOptionCache(&brw->optionCache);
1248 
1249    /* free the Mesa context */
1250    _mesa_free_context_data(&brw->ctx, true);
1251 
1252    ralloc_free(brw);
1253    driContextPriv->driverPrivate = NULL;
1254 }
1255 
1256 GLboolean
intelUnbindContext(__DRIcontext * driContextPriv)1257 intelUnbindContext(__DRIcontext * driContextPriv)
1258 {
1259    struct gl_context *ctx = driContextPriv->driverPrivate;
1260    _mesa_glthread_finish(ctx);
1261 
1262    /* Unset current context and dispath table */
1263    _mesa_make_current(NULL, NULL, NULL);
1264 
1265    return true;
1266 }
1267 
1268 /**
1269  * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
1270  * on window system framebuffers.
1271  *
1272  * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
1273  * your renderbuffer can do sRGB encode, and you can flip a switch that does
1274  * sRGB encode if the renderbuffer can handle it.  You can ask specifically
1275  * for a visual where you're guaranteed to be capable, but it turns out that
1276  * everyone just makes all their ARGB8888 visuals capable and doesn't offer
1277  * incapable ones, because there's no difference between the two in resources
1278  * used.  Applications thus get built that accidentally rely on the default
1279  * visual choice being sRGB, so we make ours sRGB capable.  Everything sounds
1280  * great...
1281  *
1282  * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
1283  * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
1284  * So they removed the enable knob and made it "if the renderbuffer is sRGB
1285  * capable, do sRGB encode".  Then, for your window system renderbuffers, you
1286  * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
1287  * and get no sRGB encode (assuming that both kinds of visual are available).
1288  * Thus our choice to support sRGB by default on our visuals for desktop would
1289  * result in broken rendering of GLES apps that aren't expecting sRGB encode.
1290  *
1291  * Unfortunately, renderbuffer setup happens before a context is created.  So
1292  * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
1293  * context (without an sRGB visual), we go turn that back off before anyone
1294  * finds out.
1295  */
1296 static void
intel_gles3_srgb_workaround(struct brw_context * brw,struct gl_framebuffer * fb)1297 intel_gles3_srgb_workaround(struct brw_context *brw,
1298                             struct gl_framebuffer *fb)
1299 {
1300    struct gl_context *ctx = &brw->ctx;
1301 
1302    if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
1303       return;
1304 
1305    for (int i = 0; i < BUFFER_COUNT; i++) {
1306       struct gl_renderbuffer *rb = fb->Attachment[i].Renderbuffer;
1307 
1308       /* Check if sRGB was specifically asked for. */
1309       struct intel_renderbuffer *irb = intel_get_renderbuffer(fb, i);
1310       if (irb && irb->need_srgb)
1311          return;
1312 
1313       if (rb)
1314          rb->Format = _mesa_get_srgb_format_linear(rb->Format);
1315    }
1316    /* Disable sRGB from framebuffers that are not compatible. */
1317    fb->Visual.sRGBCapable = false;
1318 }
1319 
1320 GLboolean
intelMakeCurrent(__DRIcontext * driContextPriv,__DRIdrawable * driDrawPriv,__DRIdrawable * driReadPriv)1321 intelMakeCurrent(__DRIcontext * driContextPriv,
1322                  __DRIdrawable * driDrawPriv,
1323                  __DRIdrawable * driReadPriv)
1324 {
1325    struct brw_context *brw;
1326 
1327    if (driContextPriv)
1328       brw = (struct brw_context *) driContextPriv->driverPrivate;
1329    else
1330       brw = NULL;
1331 
1332    if (driContextPriv) {
1333       struct gl_context *ctx = &brw->ctx;
1334       struct gl_framebuffer *fb, *readFb;
1335 
1336       if (driDrawPriv == NULL) {
1337          fb = _mesa_get_incomplete_framebuffer();
1338       } else {
1339          fb = driDrawPriv->driverPrivate;
1340          driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
1341       }
1342 
1343       if (driReadPriv == NULL) {
1344          readFb = _mesa_get_incomplete_framebuffer();
1345       } else {
1346          readFb = driReadPriv->driverPrivate;
1347          driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
1348       }
1349 
1350       /* The sRGB workaround changes the renderbuffer's format. We must change
1351        * the format before the renderbuffer's miptree get's allocated, otherwise
1352        * the formats of the renderbuffer and its miptree will differ.
1353        */
1354       intel_gles3_srgb_workaround(brw, fb);
1355       intel_gles3_srgb_workaround(brw, readFb);
1356 
1357       /* If the context viewport hasn't been initialized, force a call out to
1358        * the loader to get buffers so we have a drawable size for the initial
1359        * viewport. */
1360       if (!brw->ctx.ViewportInitialized)
1361          intel_prepare_render(brw);
1362 
1363       _mesa_make_current(ctx, fb, readFb);
1364    } else {
1365       GET_CURRENT_CONTEXT(ctx);
1366       _mesa_glthread_finish(ctx);
1367       _mesa_make_current(NULL, NULL, NULL);
1368    }
1369 
1370    return true;
1371 }
1372 
1373 void
intel_resolve_for_dri2_flush(struct brw_context * brw,__DRIdrawable * drawable)1374 intel_resolve_for_dri2_flush(struct brw_context *brw,
1375                              __DRIdrawable *drawable)
1376 {
1377    const struct gen_device_info *devinfo = &brw->screen->devinfo;
1378 
1379    if (devinfo->gen < 6) {
1380       /* MSAA and fast color clear are not supported, so don't waste time
1381        * checking whether a resolve is needed.
1382        */
1383       return;
1384    }
1385 
1386    struct gl_framebuffer *fb = drawable->driverPrivate;
1387    struct intel_renderbuffer *rb;
1388 
1389    /* Usually, only the back buffer will need to be downsampled. However,
1390     * the front buffer will also need it if the user has rendered into it.
1391     */
1392    static const gl_buffer_index buffers[2] = {
1393          BUFFER_BACK_LEFT,
1394          BUFFER_FRONT_LEFT,
1395    };
1396 
1397    for (int i = 0; i < 2; ++i) {
1398       rb = intel_get_renderbuffer(fb, buffers[i]);
1399       if (rb == NULL || rb->mt == NULL)
1400          continue;
1401       if (rb->mt->surf.samples == 1) {
1402          assert(rb->mt_layer == 0 && rb->mt_level == 0 &&
1403                 rb->layer_count == 1);
1404          intel_miptree_prepare_external(brw, rb->mt);
1405       } else {
1406          intel_renderbuffer_downsample(brw, rb);
1407 
1408          /* Call prepare_external on the single-sample miptree to do any
1409           * needed resolves prior to handing it off to the window system.
1410           * This is needed in the case that rb->singlesample_mt is Y-tiled
1411           * with CCS_E enabled but without I915_FORMAT_MOD_Y_TILED_CCS_E.  In
1412           * this case, the MSAA resolve above will write compressed data into
1413           * rb->singlesample_mt.
1414           *
1415           * TODO: Some day, if we decide to care about the tiny performance
1416           * hit we're taking by doing the MSAA resolve and then a CCS resolve,
1417           * we could detect this case and just allocate the single-sampled
1418           * miptree without aux.  However, that would be a lot of plumbing and
1419           * this is a rather exotic case so it's not really worth it.
1420           */
1421          intel_miptree_prepare_external(brw, rb->singlesample_mt);
1422       }
1423    }
1424 }
1425 
1426 static unsigned
intel_bits_per_pixel(const struct intel_renderbuffer * rb)1427 intel_bits_per_pixel(const struct intel_renderbuffer *rb)
1428 {
1429    return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
1430 }
1431 
1432 static void
1433 intel_query_dri2_buffers(struct brw_context *brw,
1434                          __DRIdrawable *drawable,
1435                          __DRIbuffer **buffers,
1436                          int *count);
1437 
1438 static void
1439 intel_process_dri2_buffer(struct brw_context *brw,
1440                           __DRIdrawable *drawable,
1441                           __DRIbuffer *buffer,
1442                           struct intel_renderbuffer *rb,
1443                           const char *buffer_name);
1444 
1445 static void
1446 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable);
1447 
1448 static void
intel_update_dri2_buffers(struct brw_context * brw,__DRIdrawable * drawable)1449 intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1450 {
1451    struct gl_framebuffer *fb = drawable->driverPrivate;
1452    struct intel_renderbuffer *rb;
1453    __DRIbuffer *buffers = NULL;
1454    int count;
1455    const char *region_name;
1456 
1457    /* Set this up front, so that in case our buffers get invalidated
1458     * while we're getting new buffers, we don't clobber the stamp and
1459     * thus ignore the invalidate. */
1460    drawable->lastStamp = drawable->dri2.stamp;
1461 
1462    if (INTEL_DEBUG & DEBUG_DRI)
1463       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1464 
1465    intel_query_dri2_buffers(brw, drawable, &buffers, &count);
1466 
1467    if (buffers == NULL)
1468       return;
1469 
1470    for (int i = 0; i < count; i++) {
1471        switch (buffers[i].attachment) {
1472        case __DRI_BUFFER_FRONT_LEFT:
1473            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1474            region_name = "dri2 front buffer";
1475            break;
1476 
1477        case __DRI_BUFFER_FAKE_FRONT_LEFT:
1478            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1479            region_name = "dri2 fake front buffer";
1480            break;
1481 
1482        case __DRI_BUFFER_BACK_LEFT:
1483            rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1484            region_name = "dri2 back buffer";
1485            break;
1486 
1487        case __DRI_BUFFER_DEPTH:
1488        case __DRI_BUFFER_HIZ:
1489        case __DRI_BUFFER_DEPTH_STENCIL:
1490        case __DRI_BUFFER_STENCIL:
1491        case __DRI_BUFFER_ACCUM:
1492        default:
1493            fprintf(stderr,
1494                    "unhandled buffer attach event, attachment type %d\n",
1495                    buffers[i].attachment);
1496            return;
1497        }
1498 
1499        intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
1500    }
1501 
1502 }
1503 
1504 void
intel_update_renderbuffers(__DRIcontext * context,__DRIdrawable * drawable)1505 intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
1506 {
1507    struct brw_context *brw = context->driverPrivate;
1508    __DRIscreen *dri_screen = brw->screen->driScrnPriv;
1509 
1510    /* Set this up front, so that in case our buffers get invalidated
1511     * while we're getting new buffers, we don't clobber the stamp and
1512     * thus ignore the invalidate. */
1513    drawable->lastStamp = drawable->dri2.stamp;
1514 
1515    if (INTEL_DEBUG & DEBUG_DRI)
1516       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1517 
1518    if (dri_screen->image.loader)
1519       intel_update_image_buffers(brw, drawable);
1520    else
1521       intel_update_dri2_buffers(brw, drawable);
1522 
1523    driUpdateFramebufferSize(&brw->ctx, drawable);
1524 }
1525 
1526 /**
1527  * intel_prepare_render should be called anywhere that curent read/drawbuffer
1528  * state is required.
1529  */
1530 void
intel_prepare_render(struct brw_context * brw)1531 intel_prepare_render(struct brw_context *brw)
1532 {
1533    struct gl_context *ctx = &brw->ctx;
1534    __DRIcontext *driContext = brw->driContext;
1535    __DRIdrawable *drawable;
1536 
1537    drawable = driContext->driDrawablePriv;
1538    if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
1539       if (drawable->lastStamp != drawable->dri2.stamp)
1540          intel_update_renderbuffers(driContext, drawable);
1541       driContext->dri2.draw_stamp = drawable->dri2.stamp;
1542    }
1543 
1544    drawable = driContext->driReadablePriv;
1545    if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
1546       if (drawable->lastStamp != drawable->dri2.stamp)
1547          intel_update_renderbuffers(driContext, drawable);
1548       driContext->dri2.read_stamp = drawable->dri2.stamp;
1549    }
1550 
1551    /* If we're currently rendering to the front buffer, the rendering
1552     * that will happen next will probably dirty the front buffer.  So
1553     * mark it as dirty here.
1554     */
1555    if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer) &&
1556        ctx->DrawBuffer != _mesa_get_incomplete_framebuffer()) {
1557       brw->front_buffer_dirty = true;
1558    }
1559 
1560    if (brw->is_shared_buffer_bound) {
1561       /* Subsequent rendering will probably dirty the shared buffer. */
1562       brw->is_shared_buffer_dirty = true;
1563    }
1564 }
1565 
1566 /**
1567  * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1568  *
1569  * To determine which DRI buffers to request, examine the renderbuffers
1570  * attached to the drawable's framebuffer. Then request the buffers with
1571  * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1572  *
1573  * This is called from intel_update_renderbuffers().
1574  *
1575  * \param drawable      Drawable whose buffers are queried.
1576  * \param buffers       [out] List of buffers returned by DRI2 query.
1577  * \param buffer_count  [out] Number of buffers returned.
1578  *
1579  * \see intel_update_renderbuffers()
1580  * \see DRI2GetBuffers()
1581  * \see DRI2GetBuffersWithFormat()
1582  */
1583 static void
intel_query_dri2_buffers(struct brw_context * brw,__DRIdrawable * drawable,__DRIbuffer ** buffers,int * buffer_count)1584 intel_query_dri2_buffers(struct brw_context *brw,
1585                          __DRIdrawable *drawable,
1586                          __DRIbuffer **buffers,
1587                          int *buffer_count)
1588 {
1589    __DRIscreen *dri_screen = brw->screen->driScrnPriv;
1590    struct gl_framebuffer *fb = drawable->driverPrivate;
1591    int i = 0;
1592    unsigned attachments[8];
1593 
1594    struct intel_renderbuffer *front_rb;
1595    struct intel_renderbuffer *back_rb;
1596 
1597    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1598    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1599 
1600    memset(attachments, 0, sizeof(attachments));
1601    if ((_mesa_is_front_buffer_drawing(fb) ||
1602         _mesa_is_front_buffer_reading(fb) ||
1603         !back_rb) && front_rb) {
1604       /* If a fake front buffer is in use, then querying for
1605        * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1606        * the real front buffer to the fake front buffer.  So before doing the
1607        * query, we need to make sure all the pending drawing has landed in the
1608        * real front buffer.
1609        */
1610       intel_batchbuffer_flush(brw);
1611       intel_flush_front(&brw->ctx);
1612 
1613       attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
1614       attachments[i++] = intel_bits_per_pixel(front_rb);
1615    } else if (front_rb && brw->front_buffer_dirty) {
1616       /* We have pending front buffer rendering, but we aren't querying for a
1617        * front buffer.  If the front buffer we have is a fake front buffer,
1618        * the X server is going to throw it away when it processes the query.
1619        * So before doing the query, make sure all the pending drawing has
1620        * landed in the real front buffer.
1621        */
1622       intel_batchbuffer_flush(brw);
1623       intel_flush_front(&brw->ctx);
1624    }
1625 
1626    if (back_rb) {
1627       attachments[i++] = __DRI_BUFFER_BACK_LEFT;
1628       attachments[i++] = intel_bits_per_pixel(back_rb);
1629    }
1630 
1631    assert(i <= ARRAY_SIZE(attachments));
1632 
1633    *buffers =
1634       dri_screen->dri2.loader->getBuffersWithFormat(drawable,
1635                                                     &drawable->w,
1636                                                     &drawable->h,
1637                                                     attachments, i / 2,
1638                                                     buffer_count,
1639                                                     drawable->loaderPrivate);
1640 }
1641 
1642 /**
1643  * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1644  *
1645  * This is called from intel_update_renderbuffers().
1646  *
1647  * \par Note:
1648  *    DRI buffers whose attachment point is DRI2BufferStencil or
1649  *    DRI2BufferDepthStencil are handled as special cases.
1650  *
1651  * \param buffer_name is a human readable name, such as "dri2 front buffer",
1652  *        that is passed to brw_bo_gem_create_from_name().
1653  *
1654  * \see intel_update_renderbuffers()
1655  */
1656 static void
intel_process_dri2_buffer(struct brw_context * brw,__DRIdrawable * drawable,__DRIbuffer * buffer,struct intel_renderbuffer * rb,const char * buffer_name)1657 intel_process_dri2_buffer(struct brw_context *brw,
1658                           __DRIdrawable *drawable,
1659                           __DRIbuffer *buffer,
1660                           struct intel_renderbuffer *rb,
1661                           const char *buffer_name)
1662 {
1663    struct gl_framebuffer *fb = drawable->driverPrivate;
1664    struct brw_bo *bo;
1665 
1666    if (!rb)
1667       return;
1668 
1669    unsigned num_samples = rb->Base.Base.NumSamples;
1670 
1671    /* We try to avoid closing and reopening the same BO name, because the first
1672     * use of a mapping of the buffer involves a bunch of page faulting which is
1673     * moderately expensive.
1674     */
1675    struct intel_mipmap_tree *last_mt;
1676    if (num_samples == 0)
1677       last_mt = rb->mt;
1678    else
1679       last_mt = rb->singlesample_mt;
1680 
1681    uint32_t old_name = 0;
1682    if (last_mt) {
1683        /* The bo already has a name because the miptree was created by a
1684 	* previous call to intel_process_dri2_buffer(). If a bo already has a
1685 	* name, then brw_bo_flink() is a low-cost getter.  It does not
1686 	* create a new name.
1687 	*/
1688       brw_bo_flink(last_mt->bo, &old_name);
1689    }
1690 
1691    if (old_name == buffer->name)
1692       return;
1693 
1694    if (INTEL_DEBUG & DEBUG_DRI) {
1695       fprintf(stderr,
1696               "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1697               buffer->name, buffer->attachment,
1698               buffer->cpp, buffer->pitch);
1699    }
1700 
1701    bo = brw_bo_gem_create_from_name(brw->bufmgr, buffer_name,
1702                                           buffer->name);
1703    if (!bo) {
1704       fprintf(stderr,
1705               "Failed to open BO for returned DRI2 buffer "
1706               "(%dx%d, %s, named %d).\n"
1707               "This is likely a bug in the X Server that will lead to a "
1708               "crash soon.\n",
1709               drawable->w, drawable->h, buffer_name, buffer->name);
1710       return;
1711    }
1712 
1713    uint32_t tiling, swizzle;
1714    brw_bo_get_tiling(bo, &tiling, &swizzle);
1715 
1716    struct intel_mipmap_tree *mt =
1717       intel_miptree_create_for_bo(brw,
1718                                   bo,
1719                                   intel_rb_format(rb),
1720                                   0,
1721                                   drawable->w,
1722                                   drawable->h,
1723                                   1,
1724                                   buffer->pitch,
1725                                   isl_tiling_from_i915_tiling(tiling),
1726                                   MIPTREE_CREATE_DEFAULT);
1727    if (!mt) {
1728       brw_bo_unreference(bo);
1729       return;
1730    }
1731 
1732    /* We got this BO from X11.  We cana't assume that we have coherent texture
1733     * access because X may suddenly decide to use it for scan-out which would
1734     * destroy coherency.
1735     */
1736    bo->cache_coherent = false;
1737 
1738    if (!intel_update_winsys_renderbuffer_miptree(brw, rb, mt,
1739                                                  drawable->w, drawable->h,
1740                                                  buffer->pitch)) {
1741       brw_bo_unreference(bo);
1742       intel_miptree_release(&mt);
1743       return;
1744    }
1745 
1746    if (_mesa_is_front_buffer_drawing(fb) &&
1747        (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
1748         buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
1749        rb->Base.Base.NumSamples > 1) {
1750       intel_renderbuffer_upsample(brw, rb);
1751    }
1752 
1753    assert(rb->mt);
1754 
1755    brw_bo_unreference(bo);
1756 }
1757 
1758 /**
1759  * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1760  *
1761  * To determine which DRI buffers to request, examine the renderbuffers
1762  * attached to the drawable's framebuffer. Then request the buffers from
1763  * the image loader
1764  *
1765  * This is called from intel_update_renderbuffers().
1766  *
1767  * \param drawable      Drawable whose buffers are queried.
1768  * \param buffers       [out] List of buffers returned by DRI2 query.
1769  * \param buffer_count  [out] Number of buffers returned.
1770  *
1771  * \see intel_update_renderbuffers()
1772  */
1773 
1774 static void
intel_update_image_buffer(struct brw_context * intel,__DRIdrawable * drawable,struct intel_renderbuffer * rb,__DRIimage * buffer,enum __DRIimageBufferMask buffer_type)1775 intel_update_image_buffer(struct brw_context *intel,
1776                           __DRIdrawable *drawable,
1777                           struct intel_renderbuffer *rb,
1778                           __DRIimage *buffer,
1779                           enum __DRIimageBufferMask buffer_type)
1780 {
1781    struct gl_framebuffer *fb = drawable->driverPrivate;
1782 
1783    if (!rb || !buffer->bo)
1784       return;
1785 
1786    unsigned num_samples = rb->Base.Base.NumSamples;
1787 
1788    /* Check and see if we're already bound to the right
1789     * buffer object
1790     */
1791    struct intel_mipmap_tree *last_mt;
1792    if (num_samples == 0)
1793       last_mt = rb->mt;
1794    else
1795       last_mt = rb->singlesample_mt;
1796 
1797    if (last_mt && last_mt->bo == buffer->bo) {
1798       if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) {
1799          intel_miptree_make_shareable(intel, last_mt);
1800       }
1801       return;
1802    }
1803 
1804    /* Only allow internal compression if samples == 0.  For multisampled
1805     * window system buffers, the only thing the single-sampled buffer is used
1806     * for is as a resolve target.  If we do any compression beyond what is
1807     * supported by the window system, we will just have to resolve so it's
1808     * probably better to just not bother.
1809     */
1810    const bool allow_internal_aux = (num_samples == 0);
1811 
1812    struct intel_mipmap_tree *mt =
1813       intel_miptree_create_for_dri_image(intel, buffer, GL_TEXTURE_2D,
1814                                          intel_rb_format(rb),
1815                                          allow_internal_aux);
1816    if (!mt)
1817       return;
1818 
1819    if (!intel_update_winsys_renderbuffer_miptree(intel, rb, mt,
1820                                                  buffer->width, buffer->height,
1821                                                  buffer->pitch)) {
1822       intel_miptree_release(&mt);
1823       return;
1824    }
1825 
1826    if (_mesa_is_front_buffer_drawing(fb) &&
1827        buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
1828        rb->Base.Base.NumSamples > 1) {
1829       intel_renderbuffer_upsample(intel, rb);
1830    }
1831 
1832    if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) {
1833       /* The compositor and the application may access this image
1834        * concurrently. The display hardware may even scanout the image while
1835        * the GPU is rendering to it.  Aux surfaces cause difficulty with
1836        * concurrent access, so permanently disable aux for this miptree.
1837        *
1838        * Perhaps we could improve overall application performance by
1839        * re-enabling the aux surface when EGL_RENDER_BUFFER transitions to
1840        * EGL_BACK_BUFFER, then disabling it again when EGL_RENDER_BUFFER
1841        * returns to EGL_SINGLE_BUFFER. I expect the wins and losses with this
1842        * approach to be highly dependent on the application's GL usage.
1843        *
1844        * I [chadv] expect clever disabling/reenabling to be counterproductive
1845        * in the use cases I care about: applications that render nearly
1846        * realtime handwriting to the surface while possibly undergiong
1847        * simultaneously scanout as a display plane. The app requires low
1848        * render latency. Even though the app spends most of its time in
1849        * shared-buffer mode, it also frequently transitions between
1850        * shared-buffer (EGL_SINGLE_BUFFER) and double-buffer (EGL_BACK_BUFFER)
1851        * mode.  Visual sutter during the transitions should be avoided.
1852        *
1853        * In this case, I [chadv] believe reducing the GPU workload at
1854        * shared-buffer/double-buffer transitions would offer a smoother app
1855        * experience than any savings due to aux compression. But I've
1856        * collected no data to prove my theory.
1857        */
1858       intel_miptree_make_shareable(intel, mt);
1859    }
1860 }
1861 
1862 static void
intel_update_image_buffers(struct brw_context * brw,__DRIdrawable * drawable)1863 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1864 {
1865    struct gl_framebuffer *fb = drawable->driverPrivate;
1866    __DRIscreen *dri_screen = brw->screen->driScrnPriv;
1867    struct intel_renderbuffer *front_rb;
1868    struct intel_renderbuffer *back_rb;
1869    struct __DRIimageList images;
1870    mesa_format format;
1871    uint32_t buffer_mask = 0;
1872    int ret;
1873 
1874    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1875    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1876 
1877    if (back_rb)
1878       format = intel_rb_format(back_rb);
1879    else if (front_rb)
1880       format = intel_rb_format(front_rb);
1881    else
1882       return;
1883 
1884    if (front_rb && (_mesa_is_front_buffer_drawing(fb) ||
1885                     _mesa_is_front_buffer_reading(fb) || !back_rb)) {
1886       buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
1887    }
1888 
1889    if (back_rb)
1890       buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
1891 
1892    ret = dri_screen->image.loader->getBuffers(drawable,
1893                                               driGLFormatToImageFormat(format),
1894                                               &drawable->dri2.stamp,
1895                                               drawable->loaderPrivate,
1896                                               buffer_mask,
1897                                               &images);
1898    if (!ret)
1899       return;
1900 
1901    if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
1902       drawable->w = images.front->width;
1903       drawable->h = images.front->height;
1904       intel_update_image_buffer(brw,
1905                                 drawable,
1906                                 front_rb,
1907                                 images.front,
1908                                 __DRI_IMAGE_BUFFER_FRONT);
1909    }
1910 
1911    if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) {
1912       drawable->w = images.back->width;
1913       drawable->h = images.back->height;
1914       intel_update_image_buffer(brw,
1915                                 drawable,
1916                                 back_rb,
1917                                 images.back,
1918                                 __DRI_IMAGE_BUFFER_BACK);
1919    }
1920 
1921    if (images.image_mask & __DRI_IMAGE_BUFFER_SHARED) {
1922       assert(images.image_mask == __DRI_IMAGE_BUFFER_SHARED);
1923       drawable->w = images.back->width;
1924       drawable->h = images.back->height;
1925       intel_update_image_buffer(brw,
1926                                 drawable,
1927                                 back_rb,
1928                                 images.back,
1929                                 __DRI_IMAGE_BUFFER_SHARED);
1930       brw->is_shared_buffer_bound = true;
1931    } else {
1932       brw->is_shared_buffer_bound = false;
1933       brw->is_shared_buffer_dirty = false;
1934    }
1935 }
1936