1 /**************************************************************************
2  *
3  * Copyright 2007 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 #include "main/bufferobj.h"
29 #include "main/image.h"
30 #include "main/pbo.h"
31 #include "main/imports.h"
32 #include "main/readpix.h"
33 #include "main/enums.h"
34 #include "main/framebuffer.h"
35 #include "util/u_inlines.h"
36 #include "util/u_format.h"
37 #include "cso_cache/cso_context.h"
38 
39 #include "st_cb_fbo.h"
40 #include "st_atom.h"
41 #include "st_context.h"
42 #include "st_cb_bitmap.h"
43 #include "st_cb_readpixels.h"
44 #include "st_debug.h"
45 #include "state_tracker/st_cb_texture.h"
46 #include "state_tracker/st_format.h"
47 #include "state_tracker/st_pbo.h"
48 #include "state_tracker/st_texture.h"
49 
50 /* The readpixels cache caches a blitted staging texture so that back-to-back
51  * calls to glReadPixels with user pointers require less CPU-GPU synchronization.
52  *
53  * Assumptions:
54  *
55  * (1) Blits have high synchronization overheads, and it is beneficial to
56  *     use a single blit of the entire framebuffer instead of many smaller
57  *     blits (because the smaller blits cannot be batched, and we have to wait
58  *     for the GPU after each one).
59  *
60  * (2) transfer_map implicitly involves a blit as well (for de-tiling, copy
61  *     from VRAM, etc.), so that it is beneficial to replace the
62  *     _mesa_readpixels path as well when possible.
63  *
64  * Change this #define to true to fill and use the cache whenever possible
65  * (this is inefficient and only meant for testing / debugging).
66  */
67 #define ALWAYS_READPIXELS_CACHE false
68 
69 static boolean
needs_integer_signed_unsigned_conversion(const struct gl_context * ctx,GLenum format,GLenum type)70 needs_integer_signed_unsigned_conversion(const struct gl_context *ctx,
71                                          GLenum format, GLenum type)
72 {
73    struct gl_renderbuffer *rb =
74       _mesa_get_read_renderbuffer_for_format(ctx, format);
75 
76    assert(rb);
77 
78    GLenum srcType = _mesa_get_format_datatype(rb->Format);
79 
80     if ((srcType == GL_INT &&
81         (type == GL_UNSIGNED_INT ||
82          type == GL_UNSIGNED_SHORT ||
83          type == GL_UNSIGNED_BYTE)) ||
84        (srcType == GL_UNSIGNED_INT &&
85         (type == GL_INT ||
86          type == GL_SHORT ||
87          type == GL_BYTE))) {
88       return TRUE;
89    }
90 
91    return FALSE;
92 }
93 
94 static bool
try_pbo_readpixels(struct st_context * st,struct st_renderbuffer * strb,bool invert_y,GLint x,GLint y,GLsizei width,GLsizei height,enum pipe_format src_format,enum pipe_format dst_format,const struct gl_pixelstore_attrib * pack,void * pixels)95 try_pbo_readpixels(struct st_context *st, struct st_renderbuffer *strb,
96                    bool invert_y,
97                    GLint x, GLint y, GLsizei width, GLsizei height,
98                    enum pipe_format src_format, enum pipe_format dst_format,
99                    const struct gl_pixelstore_attrib *pack, void *pixels)
100 {
101    struct pipe_context *pipe = st->pipe;
102    struct pipe_screen *screen = pipe->screen;
103    struct cso_context *cso = st->cso_context;
104    struct pipe_surface *surface = strb->surface;
105    struct pipe_resource *texture = strb->texture;
106    const struct util_format_description *desc;
107    struct st_pbo_addresses addr;
108    struct pipe_framebuffer_state fb;
109    enum pipe_texture_target view_target;
110    bool success = false;
111 
112    if (texture->nr_samples > 1)
113       return false;
114 
115    if (!screen->is_format_supported(screen, dst_format, PIPE_BUFFER, 0,
116                                     PIPE_BIND_SHADER_IMAGE))
117       return false;
118 
119    desc = util_format_description(dst_format);
120 
121    /* Compute PBO addresses */
122    addr.bytes_per_pixel = desc->block.bits / 8;
123    addr.xoffset = x;
124    addr.yoffset = y;
125    addr.width = width;
126    addr.height = height;
127    addr.depth = 1;
128    if (!st_pbo_addresses_pixelstore(st, GL_TEXTURE_2D, false, pack, pixels, &addr))
129       return false;
130 
131    cso_save_state(cso, (CSO_BIT_FRAGMENT_SAMPLER_VIEWS |
132                         CSO_BIT_FRAGMENT_SAMPLERS |
133                         CSO_BIT_FRAGMENT_IMAGE0 |
134                         CSO_BIT_BLEND |
135                         CSO_BIT_VERTEX_ELEMENTS |
136                         CSO_BIT_AUX_VERTEX_BUFFER_SLOT |
137                         CSO_BIT_FRAMEBUFFER |
138                         CSO_BIT_VIEWPORT |
139                         CSO_BIT_RASTERIZER |
140                         CSO_BIT_DEPTH_STENCIL_ALPHA |
141                         CSO_BIT_STREAM_OUTPUTS |
142                         CSO_BIT_PAUSE_QUERIES |
143                         CSO_BIT_SAMPLE_MASK |
144                         CSO_BIT_MIN_SAMPLES |
145                         CSO_BIT_RENDER_CONDITION |
146                         CSO_BITS_ALL_SHADERS));
147    cso_save_constant_buffer_slot0(cso, PIPE_SHADER_FRAGMENT);
148 
149    cso_set_sample_mask(cso, ~0);
150    cso_set_min_samples(cso, 1);
151    cso_set_render_condition(cso, NULL, FALSE, 0);
152 
153    /* Set up the sampler_view */
154    {
155       struct pipe_sampler_view templ;
156       struct pipe_sampler_view *sampler_view;
157       struct pipe_sampler_state sampler = {0};
158       const struct pipe_sampler_state *samplers[1] = {&sampler};
159 
160       u_sampler_view_default_template(&templ, texture, src_format);
161 
162       switch (texture->target) {
163       case PIPE_TEXTURE_CUBE:
164       case PIPE_TEXTURE_CUBE_ARRAY:
165          view_target = PIPE_TEXTURE_2D_ARRAY;
166          break;
167       default:
168          view_target = texture->target;
169          break;
170       }
171 
172       templ.target = view_target;
173       templ.u.tex.first_level = surface->u.tex.level;
174       templ.u.tex.last_level = templ.u.tex.first_level;
175 
176       if (view_target != PIPE_TEXTURE_3D) {
177          templ.u.tex.first_layer = surface->u.tex.first_layer;
178          templ.u.tex.last_layer = templ.u.tex.first_layer;
179       } else {
180          addr.constants.layer_offset = surface->u.tex.first_layer;
181       }
182 
183       sampler_view = pipe->create_sampler_view(pipe, texture, &templ);
184       if (sampler_view == NULL)
185          goto fail;
186 
187       cso_set_sampler_views(cso, PIPE_SHADER_FRAGMENT, 1, &sampler_view);
188 
189       pipe_sampler_view_reference(&sampler_view, NULL);
190 
191       cso_set_samplers(cso, PIPE_SHADER_FRAGMENT, 1, samplers);
192    }
193 
194    /* Set up destination image */
195    {
196       struct pipe_image_view image;
197 
198       memset(&image, 0, sizeof(image));
199       image.resource = addr.buffer;
200       image.format = dst_format;
201       image.access = PIPE_IMAGE_ACCESS_WRITE;
202       image.u.buf.offset = addr.first_element * addr.bytes_per_pixel;
203       image.u.buf.size = (addr.last_element - addr.first_element + 1) *
204                          addr.bytes_per_pixel;
205 
206       cso_set_shader_images(cso, PIPE_SHADER_FRAGMENT, 0, 1, &image);
207    }
208 
209    /* Set up no-attachment framebuffer */
210    memset(&fb, 0, sizeof(fb));
211    fb.width = surface->width;
212    fb.height = surface->height;
213    fb.samples = 1;
214    fb.layers = 1;
215    cso_set_framebuffer(cso, &fb);
216 
217    /* Any blend state would do. Set this just to prevent drivers having
218     * blend == NULL.
219     */
220    cso_set_blend(cso, &st->pbo.upload_blend);
221 
222    cso_set_viewport_dims(cso, fb.width, fb.height, invert_y);
223 
224    if (invert_y)
225       st_pbo_addresses_invert_y(&addr, fb.height);
226 
227    {
228       struct pipe_depth_stencil_alpha_state dsa;
229       memset(&dsa, 0, sizeof(dsa));
230       cso_set_depth_stencil_alpha(cso, &dsa);
231    }
232 
233    /* Set up the fragment shader */
234    {
235       void *fs = st_pbo_get_download_fs(st, view_target, src_format, dst_format);
236       if (!fs)
237          goto fail;
238 
239       cso_set_fragment_shader_handle(cso, fs);
240    }
241 
242    success = st_pbo_draw(st, &addr, fb.width, fb.height);
243 
244    /* Buffer written via shader images needs explicit synchronization. */
245    pipe->memory_barrier(pipe, PIPE_BARRIER_ALL);
246 
247 fail:
248    cso_restore_state(cso);
249    cso_restore_constant_buffer_slot0(cso, PIPE_SHADER_FRAGMENT);
250 
251    return success;
252 }
253 
254 /**
255  * Create a staging texture and blit the requested region to it.
256  */
257 static struct pipe_resource *
blit_to_staging(struct st_context * st,struct st_renderbuffer * strb,bool invert_y,GLint x,GLint y,GLsizei width,GLsizei height,GLenum format,enum pipe_format src_format,enum pipe_format dst_format)258 blit_to_staging(struct st_context *st, struct st_renderbuffer *strb,
259                    bool invert_y,
260                    GLint x, GLint y, GLsizei width, GLsizei height,
261                    GLenum format,
262                    enum pipe_format src_format, enum pipe_format dst_format)
263 {
264    struct pipe_context *pipe = st->pipe;
265    struct pipe_screen *screen = pipe->screen;
266    struct pipe_resource dst_templ;
267    struct pipe_resource *dst;
268    struct pipe_blit_info blit;
269 
270    /* We are creating a texture of the size of the region being read back.
271     * Need to check for NPOT texture support. */
272    if (!screen->get_param(screen, PIPE_CAP_NPOT_TEXTURES) &&
273        (!util_is_power_of_two(width) ||
274         !util_is_power_of_two(height)))
275       return NULL;
276 
277    /* create the destination texture */
278    memset(&dst_templ, 0, sizeof(dst_templ));
279    dst_templ.target = PIPE_TEXTURE_2D;
280    dst_templ.format = dst_format;
281    if (util_format_is_depth_or_stencil(dst_format))
282       dst_templ.bind |= PIPE_BIND_DEPTH_STENCIL;
283    else
284       dst_templ.bind |= PIPE_BIND_RENDER_TARGET;
285    dst_templ.usage = PIPE_USAGE_STAGING;
286 
287    st_gl_texture_dims_to_pipe_dims(GL_TEXTURE_2D, width, height, 1,
288                                    &dst_templ.width0, &dst_templ.height0,
289                                    &dst_templ.depth0, &dst_templ.array_size);
290 
291    dst = screen->resource_create(screen, &dst_templ);
292    if (!dst)
293       return NULL;
294 
295    memset(&blit, 0, sizeof(blit));
296    blit.src.resource = strb->texture;
297    blit.src.level = strb->surface->u.tex.level;
298    blit.src.format = src_format;
299    blit.dst.resource = dst;
300    blit.dst.level = 0;
301    blit.dst.format = dst->format;
302    blit.src.box.x = x;
303    blit.dst.box.x = 0;
304    blit.src.box.y = y;
305    blit.dst.box.y = 0;
306    blit.src.box.z = strb->surface->u.tex.first_layer;
307    blit.dst.box.z = 0;
308    blit.src.box.width = blit.dst.box.width = width;
309    blit.src.box.height = blit.dst.box.height = height;
310    blit.src.box.depth = blit.dst.box.depth = 1;
311    blit.mask = st_get_blit_mask(strb->Base._BaseFormat, format);
312    blit.filter = PIPE_TEX_FILTER_NEAREST;
313    blit.scissor_enable = FALSE;
314 
315    if (invert_y) {
316       blit.src.box.y = strb->Base.Height - blit.src.box.y;
317       blit.src.box.height = -blit.src.box.height;
318    }
319 
320    /* blit */
321    st->pipe->blit(st->pipe, &blit);
322 
323    return dst;
324 }
325 
326 static struct pipe_resource *
try_cached_readpixels(struct st_context * st,struct st_renderbuffer * strb,bool invert_y,GLsizei width,GLsizei height,GLenum format,enum pipe_format src_format,enum pipe_format dst_format)327 try_cached_readpixels(struct st_context *st, struct st_renderbuffer *strb,
328                       bool invert_y,
329                       GLsizei width, GLsizei height,
330                       GLenum format,
331                       enum pipe_format src_format, enum pipe_format dst_format)
332 {
333    struct pipe_resource *src = strb->texture;
334    struct pipe_resource *dst = NULL;
335 
336    if (ST_DEBUG & DEBUG_NOREADPIXCACHE)
337       return NULL;
338 
339    /* Reset cache after invalidation or switch of parameters. */
340    if (st->readpix_cache.src != src ||
341        st->readpix_cache.dst_format != dst_format ||
342        st->readpix_cache.level != strb->surface->u.tex.level ||
343        st->readpix_cache.layer != strb->surface->u.tex.first_layer) {
344       pipe_resource_reference(&st->readpix_cache.src, src);
345       pipe_resource_reference(&st->readpix_cache.cache, NULL);
346       st->readpix_cache.dst_format = dst_format;
347       st->readpix_cache.level = strb->surface->u.tex.level;
348       st->readpix_cache.layer = strb->surface->u.tex.first_layer;
349       st->readpix_cache.hits = 0;
350    }
351 
352    /* Decide whether to trigger the cache. */
353    if (!st->readpix_cache.cache) {
354       if (!strb->use_readpix_cache && !ALWAYS_READPIXELS_CACHE) {
355          /* Heuristic: If previous successive calls read at least a fraction
356           * of the surface _and_ we read again, trigger the cache.
357           */
358          unsigned threshold = MAX2(1, strb->Base.Width * strb->Base.Height / 8);
359 
360          if (st->readpix_cache.hits < threshold) {
361             st->readpix_cache.hits += width * height;
362             return NULL;
363          }
364 
365          strb->use_readpix_cache = true;
366       }
367 
368       /* Fill the cache */
369       st->readpix_cache.cache = blit_to_staging(st, strb, invert_y,
370                                                 0, 0,
371                                                 strb->Base.Width,
372                                                 strb->Base.Height, format,
373                                                 src_format, dst_format);
374    }
375 
376    /* Return an owning reference to stay consistent with the non-cached path */
377    pipe_resource_reference(&dst, st->readpix_cache.cache);
378 
379    return dst;
380 }
381 
382 /**
383  * This uses a blit to copy the read buffer to a texture format which matches
384  * the format and type combo and then a fast read-back is done using memcpy.
385  * We can do arbitrary X/Y/Z/W/0/1 swizzling here as long as there is
386  * a format which matches the swizzling.
387  *
388  * If such a format isn't available, we fall back to _mesa_readpixels.
389  *
390  * NOTE: Some drivers use a blit to convert between tiled and linear
391  *       texture layouts during texture uploads/downloads, so the blit
392  *       we do here should be free in such cases.
393  */
394 static void
st_ReadPixels(struct gl_context * ctx,GLint x,GLint y,GLsizei width,GLsizei height,GLenum format,GLenum type,const struct gl_pixelstore_attrib * pack,void * pixels)395 st_ReadPixels(struct gl_context *ctx, GLint x, GLint y,
396               GLsizei width, GLsizei height,
397               GLenum format, GLenum type,
398               const struct gl_pixelstore_attrib *pack,
399               void *pixels)
400 {
401    struct st_context *st = st_context(ctx);
402    struct gl_renderbuffer *rb =
403          _mesa_get_read_renderbuffer_for_format(ctx, format);
404    struct st_renderbuffer *strb = st_renderbuffer(rb);
405    struct pipe_context *pipe = st->pipe;
406    struct pipe_screen *screen = pipe->screen;
407    struct pipe_resource *src;
408    struct pipe_resource *dst = NULL;
409    enum pipe_format dst_format, src_format;
410    unsigned bind;
411    struct pipe_transfer *tex_xfer;
412    ubyte *map = NULL;
413    int dst_x, dst_y;
414 
415    /* Validate state (to be sure we have up-to-date framebuffer surfaces)
416     * and flush the bitmap cache prior to reading. */
417    st_validate_state(st, ST_PIPELINE_UPDATE_FRAMEBUFFER);
418    st_flush_bitmap_cache(st);
419 
420    if (!st->prefer_blit_based_texture_transfer) {
421       goto fallback;
422    }
423 
424    /* This must be done after state validation. */
425    src = strb->texture;
426 
427    /* XXX Fallback for depth-stencil formats due to an incomplete
428     * stencil blit implementation in some drivers. */
429    if (format == GL_DEPTH_STENCIL) {
430       goto fallback;
431    }
432 
433    /* If the base internal format and the texture format don't match, we have
434     * to use the slow path. */
435    if (rb->_BaseFormat !=
436        _mesa_get_format_base_format(rb->Format)) {
437       goto fallback;
438    }
439 
440    if (_mesa_readpixels_needs_slow_path(ctx, format, type, GL_TRUE)) {
441       goto fallback;
442    }
443 
444    /* Convert the source format to what is expected by ReadPixels
445     * and see if it's supported. */
446    src_format = util_format_linear(src->format);
447    src_format = util_format_luminance_to_red(src_format);
448    src_format = util_format_intensity_to_red(src_format);
449 
450    if (!src_format ||
451        !screen->is_format_supported(screen, src_format, src->target,
452                                     src->nr_samples,
453                                     PIPE_BIND_SAMPLER_VIEW)) {
454       goto fallback;
455    }
456 
457    if (format == GL_DEPTH_COMPONENT || format == GL_DEPTH_STENCIL)
458       bind = PIPE_BIND_DEPTH_STENCIL;
459    else
460       bind = PIPE_BIND_RENDER_TARGET;
461 
462    /* Choose the destination format by finding the best match
463     * for the format+type combo. */
464    dst_format = st_choose_matching_format(st, bind, format, type,
465                                           pack->SwapBytes);
466    if (dst_format == PIPE_FORMAT_NONE) {
467       goto fallback;
468    }
469 
470    if (st->pbo.download_enabled && _mesa_is_bufferobj(pack->BufferObj)) {
471       if (try_pbo_readpixels(st, strb,
472                              st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP,
473                              x, y, width, height,
474                              src_format, dst_format,
475                              pack, pixels))
476          return;
477    }
478 
479    if (needs_integer_signed_unsigned_conversion(ctx, format, type)) {
480       goto fallback;
481    }
482 
483    /* Cache a staging texture for back-to-back ReadPixels, to avoid CPU-GPU
484     * synchronization overhead.
485     */
486    dst = try_cached_readpixels(st, strb,
487                                st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP,
488                                width, height, format, src_format, dst_format);
489    if (dst) {
490       dst_x = x;
491       dst_y = y;
492    } else {
493       /* See if the texture format already matches the format and type,
494        * in which case the memcpy-based fast path will likely be used and
495        * we don't have to blit. */
496       if (_mesa_format_matches_format_and_type(rb->Format, format,
497                                                type, pack->SwapBytes, NULL)) {
498          goto fallback;
499       }
500 
501       dst = blit_to_staging(st, strb,
502                             st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP,
503                             x, y, width, height, format,
504                             src_format, dst_format);
505       if (!dst)
506          goto fallback;
507 
508       dst_x = 0;
509       dst_y = 0;
510    }
511 
512    /* map resources */
513    pixels = _mesa_map_pbo_dest(ctx, pack, pixels);
514 
515    map = pipe_transfer_map_3d(pipe, dst, 0, PIPE_TRANSFER_READ,
516                               dst_x, dst_y, 0, width, height, 1, &tex_xfer);
517    if (!map) {
518       _mesa_unmap_pbo_dest(ctx, pack);
519       pipe_resource_reference(&dst, NULL);
520       goto fallback;
521    }
522 
523    /* memcpy data into a user buffer */
524    {
525       const uint bytesPerRow = width * util_format_get_blocksize(dst_format);
526       const int destStride = _mesa_image_row_stride(pack, width, format, type);
527       char *dest = _mesa_image_address2d(pack, pixels,
528                                          width, height, format,
529                                          type, 0, 0);
530 
531       if (tex_xfer->stride == bytesPerRow && destStride == bytesPerRow) {
532          memcpy(dest, map, bytesPerRow * height);
533       } else {
534          GLuint row;
535 
536          for (row = 0; row < (unsigned) height; row++) {
537             memcpy(dest, map, bytesPerRow);
538             map += tex_xfer->stride;
539             dest += destStride;
540          }
541       }
542    }
543 
544    pipe_transfer_unmap(pipe, tex_xfer);
545    _mesa_unmap_pbo_dest(ctx, pack);
546    pipe_resource_reference(&dst, NULL);
547    return;
548 
549 fallback:
550    _mesa_readpixels(ctx, x, y, width, height, format, type, pack, pixels);
551 }
552 
st_init_readpixels_functions(struct dd_function_table * functions)553 void st_init_readpixels_functions(struct dd_function_table *functions)
554 {
555    functions->ReadPixels = st_ReadPixels;
556 }
557