1 /****************************************************************************
2  * Copyright (C) 2015 Intel Corporation.   All Rights Reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  ***************************************************************************/
23 
24 #include "swr_screen.h"
25 #include "swr_context.h"
26 #include "swr_resource.h"
27 #include "swr_fence.h"
28 #include "swr_query.h"
29 #include "jit_api.h"
30 
31 #include "util/u_draw.h"
32 #include "util/u_prim.h"
33 
34 /*
35  * Draw vertex arrays, with optional indexing, optional instancing.
36  */
37 static void
swr_draw_vbo(struct pipe_context * pipe,const struct pipe_draw_info * info)38 swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
39 {
40    struct swr_context *ctx = swr_context(pipe);
41 
42    if (!info->count_from_stream_output && !info->indirect &&
43        !info->primitive_restart &&
44        !u_trim_pipe_prim(info->mode, (unsigned*)&info->count))
45       return;
46 
47    if (!swr_check_render_cond(pipe))
48       return;
49 
50    if (info->indirect) {
51       util_draw_indirect(pipe, info);
52       return;
53    }
54 
55    /* If indexed draw, force vertex validation since index buffer comes
56     * from draw info. */
57    if (info->index_size)
58       ctx->dirty |= SWR_NEW_VERTEX;
59 
60    /* Update derived state, pass draw info to update function. */
61    swr_update_derived(pipe, info);
62 
63    swr_update_draw_context(ctx);
64 
65    if (ctx->vs->pipe.stream_output.num_outputs) {
66       if (!ctx->vs->soFunc[info->mode]) {
67          STREAMOUT_COMPILE_STATE state = {0};
68          struct pipe_stream_output_info *so = &ctx->vs->pipe.stream_output;
69 
70          state.numVertsPerPrim = u_vertices_per_prim(info->mode);
71 
72          uint32_t offsets[MAX_SO_STREAMS] = {0};
73          uint32_t num = 0;
74 
75          for (uint32_t i = 0; i < so->num_outputs; i++) {
76             assert(so->output[i].stream == 0); // @todo
77             uint32_t output_buffer = so->output[i].output_buffer;
78             if (so->output[i].dst_offset != offsets[output_buffer]) {
79                // hole - need to fill
80                state.stream.decl[num].bufferIndex = output_buffer;
81                state.stream.decl[num].hole = true;
82                state.stream.decl[num].componentMask =
83                   (1 << (so->output[i].dst_offset - offsets[output_buffer]))
84                   - 1;
85                num++;
86                offsets[output_buffer] = so->output[i].dst_offset;
87             }
88 
89             unsigned attrib_slot = so->output[i].register_index;
90             attrib_slot = swr_so_adjust_attrib(attrib_slot, ctx->vs);
91 
92             state.stream.decl[num].bufferIndex = output_buffer;
93             state.stream.decl[num].attribSlot = attrib_slot;
94             state.stream.decl[num].componentMask =
95                ((1 << so->output[i].num_components) - 1)
96                << so->output[i].start_component;
97             state.stream.decl[num].hole = false;
98             num++;
99 
100             offsets[output_buffer] += so->output[i].num_components;
101          }
102 
103          state.stream.numDecls = num;
104 
105          HANDLE hJitMgr = swr_screen(pipe->screen)->hJitMgr;
106          ctx->vs->soFunc[info->mode] = JitCompileStreamout(hJitMgr, state);
107          debug_printf("so shader    %p\n", ctx->vs->soFunc[info->mode]);
108          assert(ctx->vs->soFunc[info->mode] && "Error: SoShader = NULL");
109       }
110 
111       ctx->api.pfnSwrSetSoFunc(ctx->swrContext, ctx->vs->soFunc[info->mode], 0);
112    }
113 
114    struct swr_vertex_element_state *velems = ctx->velems;
115    if (info->primitive_restart)
116       velems->fsState.cutIndex = info->restart_index;
117    else
118       velems->fsState.cutIndex = 0;
119    velems->fsState.bEnableCutIndex = info->primitive_restart;
120    velems->fsState.bPartialVertexBuffer = (info->min_index > 0);
121 
122    swr_jit_fetch_key key;
123    swr_generate_fetch_key(key, velems);
124    auto search = velems->map.find(key);
125    if (search != velems->map.end()) {
126       velems->fsFunc = search->second;
127    } else {
128       HANDLE hJitMgr = swr_screen(ctx->pipe.screen)->hJitMgr;
129       velems->fsFunc = JitCompileFetch(hJitMgr, velems->fsState);
130 
131       debug_printf("fetch shader %p\n", velems->fsFunc);
132       assert(velems->fsFunc && "Error: FetchShader = NULL");
133 
134       velems->map.insert(std::make_pair(key, velems->fsFunc));
135    }
136 
137    ctx->api.pfnSwrSetFetchFunc(ctx->swrContext, velems->fsFunc);
138 
139    /* Set up frontend state
140     * XXX setup provokingVertex & topologyProvokingVertex */
141    SWR_FRONTEND_STATE feState = {0};
142 
143    // feState.vsVertexSize seeds the PA size that is used as an interface
144    // between all the shader stages, so it has to be large enough to
145    // incorporate all interfaces between stages
146 
147    // max of gs and vs num_outputs
148    feState.vsVertexSize = ctx->vs->info.base.num_outputs;
149    if (ctx->gs &&
150        ctx->gs->info.base.num_outputs > feState.vsVertexSize) {
151       feState.vsVertexSize = ctx->gs->info.base.num_outputs;
152    }
153 
154    if (ctx->vs->info.base.num_outputs) {
155       // gs does not adjust for position in SGV slot at input from vs
156       if (!ctx->gs)
157          feState.vsVertexSize--;
158    }
159 
160    // other (non-SGV) slots start at VERTEX_ATTRIB_START_SLOT
161    feState.vsVertexSize += VERTEX_ATTRIB_START_SLOT;
162 
163    // The PA in the clipper does not handle BE vertex sizes
164    // different from FE. Increase vertexsize only for the cases that needed it
165 
166    // primid needs a slot
167    if (ctx->fs->info.base.uses_primid)
168       feState.vsVertexSize++;
169    // sprite coord enable
170    if (ctx->rasterizer->sprite_coord_enable)
171       feState.vsVertexSize++;
172 
173 
174    if (ctx->rasterizer->flatshade_first) {
175       feState.provokingVertex = {1, 0, 0};
176    } else {
177       feState.provokingVertex = {2, 1, 2};
178    }
179 
180    enum pipe_prim_type topology;
181    if (ctx->gs)
182       topology = (pipe_prim_type)ctx->gs->info.base.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM];
183    else
184       topology = info->mode;
185 
186    switch (topology) {
187    case PIPE_PRIM_TRIANGLE_FAN:
188       feState.topologyProvokingVertex = feState.provokingVertex.triFan;
189       break;
190    case PIPE_PRIM_TRIANGLE_STRIP:
191    case PIPE_PRIM_TRIANGLES:
192       feState.topologyProvokingVertex = feState.provokingVertex.triStripList;
193       break;
194    case PIPE_PRIM_QUAD_STRIP:
195    case PIPE_PRIM_QUADS:
196       if (ctx->rasterizer->flatshade_first)
197          feState.topologyProvokingVertex = 0;
198       else
199          feState.topologyProvokingVertex = 3;
200       break;
201    case PIPE_PRIM_LINES:
202    case PIPE_PRIM_LINE_LOOP:
203    case PIPE_PRIM_LINE_STRIP:
204       feState.topologyProvokingVertex = feState.provokingVertex.lineStripList;
205       break;
206    default:
207       feState.topologyProvokingVertex = 0;
208    }
209 
210    feState.bEnableCutIndex = info->primitive_restart;
211    ctx->api.pfnSwrSetFrontendState(ctx->swrContext, &feState);
212 
213    if (info->index_size)
214       ctx->api.pfnSwrDrawIndexedInstanced(ctx->swrContext,
215                                           swr_convert_prim_topology(info->mode),
216                                           info->count,
217                                           info->instance_count,
218                                           info->start,
219                                           info->index_bias,
220                                           info->start_instance);
221    else
222       ctx->api.pfnSwrDrawInstanced(ctx->swrContext,
223                                    swr_convert_prim_topology(info->mode),
224                                    info->count,
225                                    info->instance_count,
226                                    info->start,
227                                    info->start_instance);
228 
229    /* On large client-buffer draw, we used client buffer directly, without
230     * copy.  Block until draw is finished.
231     * VMD is an example application that benefits from this. */
232    if (ctx->dirty & SWR_LARGE_CLIENT_DRAW) {
233       struct swr_screen *screen = swr_screen(pipe->screen);
234       swr_fence_submit(ctx, screen->flush_fence);
235       swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0);
236    }
237 }
238 
239 
240 static void
swr_flush(struct pipe_context * pipe,struct pipe_fence_handle ** fence,unsigned flags)241 swr_flush(struct pipe_context *pipe,
242           struct pipe_fence_handle **fence,
243           unsigned flags)
244 {
245    struct swr_context *ctx = swr_context(pipe);
246    struct swr_screen *screen = swr_screen(pipe->screen);
247 
248    for (int i=0; i < ctx->framebuffer.nr_cbufs; i++) {
249       struct pipe_surface *cb = ctx->framebuffer.cbufs[i];
250       if (cb) {
251          swr_store_dirty_resource(pipe, cb->texture, SWR_TILE_RESOLVED);
252       }
253    }
254    if (ctx->framebuffer.zsbuf) {
255       swr_store_dirty_resource(pipe, ctx->framebuffer.zsbuf->texture,
256                                SWR_TILE_RESOLVED);
257    }
258 
259    if (fence)
260       swr_fence_reference(pipe->screen, fence, screen->flush_fence);
261 }
262 
263 void
swr_finish(struct pipe_context * pipe)264 swr_finish(struct pipe_context *pipe)
265 {
266    struct pipe_fence_handle *fence = nullptr;
267 
268    swr_flush(pipe, &fence, 0);
269    swr_fence_finish(pipe->screen, NULL, fence, 0);
270    swr_fence_reference(pipe->screen, &fence, NULL);
271 }
272 
273 /*
274  * Invalidate tiles so they can be reloaded back when needed
275  */
276 void
swr_invalidate_render_target(struct pipe_context * pipe,uint32_t attachment,uint16_t width,uint16_t height)277 swr_invalidate_render_target(struct pipe_context *pipe,
278                              uint32_t attachment,
279                              uint16_t width, uint16_t height)
280 {
281    struct swr_context *ctx = swr_context(pipe);
282 
283    /* grab the rect from the passed in arguments */
284    swr_update_draw_context(ctx);
285    SWR_RECT full_rect =
286       {0, 0, (int32_t)width, (int32_t)height};
287    ctx->api.pfnSwrInvalidateTiles(ctx->swrContext,
288                                   1 << attachment,
289                                   full_rect);
290 }
291 
292 
293 /*
294  * Store SWR HotTiles back to renderTarget surface.
295  */
296 void
swr_store_render_target(struct pipe_context * pipe,uint32_t attachment,enum SWR_TILE_STATE post_tile_state)297 swr_store_render_target(struct pipe_context *pipe,
298                         uint32_t attachment,
299                         enum SWR_TILE_STATE post_tile_state)
300 {
301    struct swr_context *ctx = swr_context(pipe);
302    struct swr_draw_context *pDC = &ctx->swrDC;
303    struct SWR_SURFACE_STATE *renderTarget = &pDC->renderTargets[attachment];
304 
305    /* Only proceed if there's a valid surface to store to */
306    if (renderTarget->xpBaseAddress) {
307       swr_update_draw_context(ctx);
308       SWR_RECT full_rect =
309          {0, 0,
310           (int32_t)u_minify(renderTarget->width, renderTarget->lod),
311           (int32_t)u_minify(renderTarget->height, renderTarget->lod)};
312       ctx->api.pfnSwrStoreTiles(ctx->swrContext,
313                                 1 << attachment,
314                                 post_tile_state,
315                                 full_rect);
316    }
317 }
318 
319 void
swr_store_dirty_resource(struct pipe_context * pipe,struct pipe_resource * resource,enum SWR_TILE_STATE post_tile_state)320 swr_store_dirty_resource(struct pipe_context *pipe,
321                          struct pipe_resource *resource,
322                          enum SWR_TILE_STATE post_tile_state)
323 {
324    /* Only store resource if it has been written to */
325    if (swr_resource(resource)->status & SWR_RESOURCE_WRITE) {
326       struct swr_context *ctx = swr_context(pipe);
327       struct swr_screen *screen = swr_screen(pipe->screen);
328       struct swr_resource *spr = swr_resource(resource);
329 
330       swr_draw_context *pDC = &ctx->swrDC;
331       SWR_SURFACE_STATE *renderTargets = pDC->renderTargets;
332       for (uint32_t i = 0; i < SWR_NUM_ATTACHMENTS; i++)
333          if (renderTargets[i].xpBaseAddress == spr->swr.xpBaseAddress ||
334              (spr->secondary.xpBaseAddress &&
335               renderTargets[i].xpBaseAddress == spr->secondary.xpBaseAddress)) {
336             swr_store_render_target(pipe, i, post_tile_state);
337 
338             /* Mesa thinks depth/stencil are fused, so we'll never get an
339              * explicit resource for stencil.  So, if checking depth, then
340              * also check for stencil. */
341             if (spr->has_stencil && (i == SWR_ATTACHMENT_DEPTH)) {
342                swr_store_render_target(
343                   pipe, SWR_ATTACHMENT_STENCIL, post_tile_state);
344             }
345 
346             /* This fence signals StoreTiles completion */
347             swr_fence_submit(ctx, screen->flush_fence);
348 
349             break;
350          }
351    }
352 }
353 
354 void
swr_draw_init(struct pipe_context * pipe)355 swr_draw_init(struct pipe_context *pipe)
356 {
357    pipe->draw_vbo = swr_draw_vbo;
358    pipe->flush = swr_flush;
359 }
360