1 /****************************************************************************
2  * Copyright (C) 2015 Intel Corporation.   All Rights Reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  ***************************************************************************/
23 
24 #include "swr_screen.h"
25 #include "swr_context.h"
26 #include "swr_resource.h"
27 #include "swr_fence.h"
28 #include "swr_query.h"
29 #include "jit_api.h"
30 
31 #include "util/u_draw.h"
32 #include "util/u_prim.h"
33 
34 #include <algorithm>
35 #include <iostream>
36 /*
37  * Draw vertex arrays, with optional indexing, optional instancing.
38  */
39 static void
swr_draw_vbo(struct pipe_context * pipe,const struct pipe_draw_info * info)40 swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
41 {
42    struct swr_context *ctx = swr_context(pipe);
43 
44    if (!info->count_from_stream_output && !info->indirect &&
45        !info->primitive_restart &&
46        !u_trim_pipe_prim(info->mode, (unsigned*)&info->count))
47       return;
48 
49    if (!swr_check_render_cond(pipe))
50       return;
51 
52    if (info->indirect) {
53       util_draw_indirect(pipe, info);
54       return;
55    }
56 
57    /* If indexed draw, force vertex validation since index buffer comes
58     * from draw info. */
59    if (info->index_size)
60       ctx->dirty |= SWR_NEW_VERTEX;
61 
62    /* Update derived state, pass draw info to update function. */
63    swr_update_derived(pipe, info);
64 
65    swr_update_draw_context(ctx);
66 
67    struct pipe_draw_info resolved_info;
68    /* DrawTransformFeedback */
69    if (info->count_from_stream_output) {
70       // trick copied from softpipe to modify const struct *info
71       memcpy(&resolved_info, (void*)info, sizeof(struct pipe_draw_info));
72       resolved_info.count = ctx->so_primCounter * resolved_info.vertices_per_patch;
73       resolved_info.max_index = resolved_info.count - 1;
74       info = &resolved_info;
75    }
76 
77    if (ctx->vs->pipe.stream_output.num_outputs) {
78       if (!ctx->vs->soFunc[info->mode]) {
79          STREAMOUT_COMPILE_STATE state = {0};
80          struct pipe_stream_output_info *so = &ctx->vs->pipe.stream_output;
81 
82          state.numVertsPerPrim = u_vertices_per_prim(info->mode);
83 
84          uint32_t offsets[MAX_SO_STREAMS] = {0};
85          uint32_t num = 0;
86 
87          for (uint32_t i = 0; i < so->num_outputs; i++) {
88             assert(so->output[i].stream == 0); // @todo
89             uint32_t output_buffer = so->output[i].output_buffer;
90             if (so->output[i].dst_offset != offsets[output_buffer]) {
91                // hole - need to fill
92                state.stream.decl[num].bufferIndex = output_buffer;
93                state.stream.decl[num].hole = true;
94                state.stream.decl[num].componentMask =
95                   (1 << (so->output[i].dst_offset - offsets[output_buffer]))
96                   - 1;
97                num++;
98                offsets[output_buffer] = so->output[i].dst_offset;
99             }
100 
101             unsigned attrib_slot = so->output[i].register_index;
102             attrib_slot = swr_so_adjust_attrib(attrib_slot, ctx->vs);
103 
104             state.stream.decl[num].bufferIndex = output_buffer;
105             state.stream.decl[num].attribSlot = attrib_slot;
106             state.stream.decl[num].componentMask =
107                ((1 << so->output[i].num_components) - 1)
108                << so->output[i].start_component;
109             state.stream.decl[num].hole = false;
110             num++;
111 
112             offsets[output_buffer] += so->output[i].num_components;
113          }
114 
115          state.stream.numDecls = num;
116 
117          HANDLE hJitMgr = swr_screen(pipe->screen)->hJitMgr;
118          ctx->vs->soFunc[info->mode] = JitCompileStreamout(hJitMgr, state);
119          debug_printf("so shader    %p\n", ctx->vs->soFunc[info->mode]);
120          assert(ctx->vs->soFunc[info->mode] && "Error: SoShader = NULL");
121       }
122 
123       ctx->api.pfnSwrSetSoFunc(ctx->swrContext, ctx->vs->soFunc[info->mode], 0);
124    }
125 
126    struct swr_vertex_element_state *velems = ctx->velems;
127    if (info->primitive_restart)
128       velems->fsState.cutIndex = info->restart_index;
129    else
130       velems->fsState.cutIndex = 0;
131    velems->fsState.bEnableCutIndex = info->primitive_restart;
132    velems->fsState.bPartialVertexBuffer = (info->min_index > 0);
133 
134    swr_jit_fetch_key key;
135    swr_generate_fetch_key(key, velems);
136    auto search = velems->map.find(key);
137    if (search != velems->map.end()) {
138       velems->fsFunc = search->second;
139    } else {
140       HANDLE hJitMgr = swr_screen(ctx->pipe.screen)->hJitMgr;
141       velems->fsFunc = JitCompileFetch(hJitMgr, velems->fsState);
142 
143       debug_printf("fetch shader %p\n", velems->fsFunc);
144       assert(velems->fsFunc && "Error: FetchShader = NULL");
145 
146       velems->map.insert(std::make_pair(key, velems->fsFunc));
147    }
148 
149    ctx->api.pfnSwrSetFetchFunc(ctx->swrContext, velems->fsFunc);
150 
151    /* Set up frontend state
152     * XXX setup provokingVertex & topologyProvokingVertex */
153    SWR_FRONTEND_STATE feState = {0};
154 
155    // feState.vsVertexSize seeds the PA size that is used as an interface
156    // between all the shader stages, so it has to be large enough to
157    // incorporate all interfaces between stages
158 
159    // max of frontend shaders num_outputs
160    feState.vsVertexSize = ctx->vs->info.base.num_outputs;
161    if (ctx->gs) {
162       feState.vsVertexSize = std::max(feState.vsVertexSize, (uint32_t)ctx->gs->info.base.num_outputs);
163    }
164    if (ctx->tcs) {
165       feState.vsVertexSize = std::max(feState.vsVertexSize, (uint32_t)ctx->tcs->info.base.num_outputs);
166    }
167    if (ctx->tes) {
168       feState.vsVertexSize = std::max(feState.vsVertexSize, (uint32_t)ctx->tes->info.base.num_outputs);
169    }
170 
171 
172    if (ctx->vs->info.base.num_outputs) {
173       // gs does not adjust for position in SGV slot at input from vs
174       if (!ctx->gs && !ctx->tcs && !ctx->tes)
175          feState.vsVertexSize--;
176    }
177 
178    // other (non-SGV) slots start at VERTEX_ATTRIB_START_SLOT
179    feState.vsVertexSize += VERTEX_ATTRIB_START_SLOT;
180 
181    // The PA in the clipper does not handle BE vertex sizes
182    // different from FE. Increase vertexsize only for the cases that needed it
183 
184    // primid needs a slot
185    if (ctx->fs->info.base.uses_primid)
186       feState.vsVertexSize++;
187    // sprite coord enable
188    if (ctx->rasterizer->sprite_coord_enable)
189       feState.vsVertexSize++;
190 
191    if (ctx->rasterizer->flatshade_first) {
192       feState.provokingVertex = {1, 0, 0};
193    } else {
194       feState.provokingVertex = {2, 1, 2};
195    }
196 
197    enum pipe_prim_type topology;
198    if (ctx->gs)
199       topology = (pipe_prim_type)ctx->gs->info.base.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM];
200    else
201       topology = info->mode;
202 
203    switch (topology) {
204    case PIPE_PRIM_TRIANGLE_FAN:
205       feState.topologyProvokingVertex = feState.provokingVertex.triFan;
206       break;
207    case PIPE_PRIM_TRIANGLE_STRIP:
208    case PIPE_PRIM_TRIANGLES:
209       feState.topologyProvokingVertex = feState.provokingVertex.triStripList;
210       break;
211    case PIPE_PRIM_QUAD_STRIP:
212    case PIPE_PRIM_QUADS:
213       if (ctx->rasterizer->flatshade_first)
214          feState.topologyProvokingVertex = 0;
215       else
216          feState.topologyProvokingVertex = 3;
217       break;
218    case PIPE_PRIM_LINES:
219    case PIPE_PRIM_LINE_LOOP:
220    case PIPE_PRIM_LINE_STRIP:
221       feState.topologyProvokingVertex = feState.provokingVertex.lineStripList;
222       break;
223    default:
224       feState.topologyProvokingVertex = 0;
225    }
226 
227    feState.bEnableCutIndex = info->primitive_restart;
228    ctx->api.pfnSwrSetFrontendState(ctx->swrContext, &feState);
229 
230    if (info->index_size)
231       ctx->api.pfnSwrDrawIndexedInstanced(ctx->swrContext,
232                                           swr_convert_prim_topology(info->mode, info->vertices_per_patch),
233                                           info->count,
234                                           info->instance_count,
235                                           info->start,
236                                           info->index_bias,
237                                           info->start_instance);
238    else
239       ctx->api.pfnSwrDrawInstanced(ctx->swrContext,
240                                    swr_convert_prim_topology(info->mode, info->vertices_per_patch),
241                                    info->count,
242                                    info->instance_count,
243                                    info->start,
244                                    info->start_instance);
245 
246    /* On client-buffer draw, we used client buffer directly, without
247     * copy.  Block until draw is finished.
248     * VMD is an example application that benefits from this. */
249    if (ctx->dirty & SWR_BLOCK_CLIENT_DRAW) {
250       struct swr_screen *screen = swr_screen(pipe->screen);
251       swr_fence_submit(ctx, screen->flush_fence);
252       swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0);
253    }
254 }
255 
256 
257 static void
swr_flush(struct pipe_context * pipe,struct pipe_fence_handle ** fence,unsigned flags)258 swr_flush(struct pipe_context *pipe,
259           struct pipe_fence_handle **fence,
260           unsigned flags)
261 {
262    struct swr_context *ctx = swr_context(pipe);
263    struct swr_screen *screen = swr_screen(pipe->screen);
264 
265    for (int i=0; i < ctx->framebuffer.nr_cbufs; i++) {
266       struct pipe_surface *cb = ctx->framebuffer.cbufs[i];
267       if (cb) {
268          swr_store_dirty_resource(pipe, cb->texture, SWR_TILE_RESOLVED);
269       }
270    }
271    if (ctx->framebuffer.zsbuf) {
272       swr_store_dirty_resource(pipe, ctx->framebuffer.zsbuf->texture,
273                                SWR_TILE_RESOLVED);
274    }
275 
276    if (fence)
277       swr_fence_reference(pipe->screen, fence, screen->flush_fence);
278 }
279 
280 void
swr_finish(struct pipe_context * pipe)281 swr_finish(struct pipe_context *pipe)
282 {
283    struct pipe_fence_handle *fence = nullptr;
284 
285    swr_flush(pipe, &fence, 0);
286    swr_fence_finish(pipe->screen, NULL, fence, 0);
287    swr_fence_reference(pipe->screen, &fence, NULL);
288 }
289 
290 /*
291  * Invalidate tiles so they can be reloaded back when needed
292  */
293 void
swr_invalidate_render_target(struct pipe_context * pipe,uint32_t attachment,uint16_t width,uint16_t height)294 swr_invalidate_render_target(struct pipe_context *pipe,
295                              uint32_t attachment,
296                              uint16_t width, uint16_t height)
297 {
298    struct swr_context *ctx = swr_context(pipe);
299 
300    /* grab the rect from the passed in arguments */
301    swr_update_draw_context(ctx);
302    SWR_RECT full_rect =
303       {0, 0, (int32_t)width, (int32_t)height};
304    ctx->api.pfnSwrInvalidateTiles(ctx->swrContext,
305                                   1 << attachment,
306                                   full_rect);
307 }
308 
309 
310 /*
311  * Store SWR HotTiles back to renderTarget surface.
312  */
313 void
swr_store_render_target(struct pipe_context * pipe,uint32_t attachment,enum SWR_TILE_STATE post_tile_state)314 swr_store_render_target(struct pipe_context *pipe,
315                         uint32_t attachment,
316                         enum SWR_TILE_STATE post_tile_state)
317 {
318    struct swr_context *ctx = swr_context(pipe);
319    struct swr_draw_context *pDC = &ctx->swrDC;
320    struct SWR_SURFACE_STATE *renderTarget = &pDC->renderTargets[attachment];
321 
322    /* Only proceed if there's a valid surface to store to */
323    if (renderTarget->xpBaseAddress) {
324       swr_update_draw_context(ctx);
325       SWR_RECT full_rect =
326          {0, 0,
327           (int32_t)u_minify(renderTarget->width, renderTarget->lod),
328           (int32_t)u_minify(renderTarget->height, renderTarget->lod)};
329       ctx->api.pfnSwrStoreTiles(ctx->swrContext,
330                                 1 << attachment,
331                                 post_tile_state,
332                                 full_rect);
333    }
334 }
335 
336 void
swr_store_dirty_resource(struct pipe_context * pipe,struct pipe_resource * resource,enum SWR_TILE_STATE post_tile_state)337 swr_store_dirty_resource(struct pipe_context *pipe,
338                          struct pipe_resource *resource,
339                          enum SWR_TILE_STATE post_tile_state)
340 {
341    /* Only store resource if it has been written to */
342    if (swr_resource(resource)->status & SWR_RESOURCE_WRITE) {
343       struct swr_context *ctx = swr_context(pipe);
344       struct swr_screen *screen = swr_screen(pipe->screen);
345       struct swr_resource *spr = swr_resource(resource);
346 
347       swr_draw_context *pDC = &ctx->swrDC;
348       SWR_SURFACE_STATE *renderTargets = pDC->renderTargets;
349       for (uint32_t i = 0; i < SWR_NUM_ATTACHMENTS; i++)
350          if (renderTargets[i].xpBaseAddress == spr->swr.xpBaseAddress ||
351              (spr->secondary.xpBaseAddress &&
352               renderTargets[i].xpBaseAddress == spr->secondary.xpBaseAddress)) {
353             swr_store_render_target(pipe, i, post_tile_state);
354 
355             /* Mesa thinks depth/stencil are fused, so we'll never get an
356              * explicit resource for stencil.  So, if checking depth, then
357              * also check for stencil. */
358             if (spr->has_stencil && (i == SWR_ATTACHMENT_DEPTH)) {
359                swr_store_render_target(
360                   pipe, SWR_ATTACHMENT_STENCIL, post_tile_state);
361             }
362 
363             /* This fence signals StoreTiles completion */
364             swr_fence_submit(ctx, screen->flush_fence);
365 
366             break;
367          }
368    }
369 }
370 
371 void
swr_draw_init(struct pipe_context * pipe)372 swr_draw_init(struct pipe_context *pipe)
373 {
374    pipe->draw_vbo = swr_draw_vbo;
375    pipe->flush = swr_flush;
376 }
377