1 /**************************************************************************
2  *
3  * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 /* Provide additional functionality on top of bufmgr buffers:
29  *   - 2d semantics and blit operations
30  *   - refcounting of buffers for multiple images in a buffer.
31  *   - refcounting of buffer mappings.
32  *   - some logic for moving the buffers to the best memory pools for
33  *     given operations.
34  *
35  * Most of this is to make it easier to implement the fixed-layout
36  * mipmap tree required by intel hardware in the face of GL's
37  * programming interface where each image can be specifed in random
38  * order and it isn't clear what layout the tree should have until the
39  * last moment.
40  */
41 
42 #include <sys/ioctl.h>
43 #include <errno.h>
44 
45 #include "main/hash.h"
46 #include "intel_context.h"
47 #include "intel_regions.h"
48 #include "intel_blit.h"
49 #include "intel_buffer_objects.h"
50 #include "intel_bufmgr.h"
51 #include "intel_batchbuffer.h"
52 
53 #define FILE_DEBUG_FLAG DEBUG_REGION
54 
55 /* This should be set to the maximum backtrace size desired.
56  * Set it to 0 to disable backtrace debugging.
57  */
58 #define DEBUG_BACKTRACE_SIZE 0
59 
60 #if DEBUG_BACKTRACE_SIZE == 0
61 /* Use the standard debug output */
62 #define _DBG(...) DBG(__VA_ARGS__)
63 #else
64 /* Use backtracing debug output */
65 #define _DBG(...) {debug_backtrace(); DBG(__VA_ARGS__);}
66 
67 /* Backtracing debug support */
68 #include <execinfo.h>
69 
70 static void
debug_backtrace(void)71 debug_backtrace(void)
72 {
73    void *trace[DEBUG_BACKTRACE_SIZE];
74    char **strings = NULL;
75    int traceSize;
76    register int i;
77 
78    traceSize = backtrace(trace, DEBUG_BACKTRACE_SIZE);
79    strings = backtrace_symbols(trace, traceSize);
80    if (strings == NULL) {
81       DBG("no backtrace:");
82       return;
83    }
84 
85    /* Spit out all the strings with a colon separator.  Ignore
86     * the first, since we don't really care about the call
87     * to debug_backtrace() itself.  Skip until the final "/" in
88     * the trace to avoid really long lines.
89     */
90    for (i = 1; i < traceSize; i++) {
91       char *p = strings[i], *slash = strings[i];
92       while (*p) {
93          if (*p++ == '/') {
94             slash = p;
95          }
96       }
97 
98       DBG("%s:", slash);
99    }
100 
101    /* Free up the memory, and we're done */
102    free(strings);
103 }
104 
105 #endif
106 
107 
108 
109 /* XXX: Thread safety?
110  */
111 void *
intel_region_map(struct intel_context * intel,struct intel_region * region,GLbitfield mode)112 intel_region_map(struct intel_context *intel, struct intel_region *region,
113                  GLbitfield mode)
114 {
115    /* We have the region->map_refcount controlling mapping of the BO because
116     * in software fallbacks we may end up mapping the same buffer multiple
117     * times on Mesa's behalf, so we refcount our mappings to make sure that
118     * the pointer stays valid until the end of the unmap chain.  However, we
119     * must not emit any batchbuffers between the start of mapping and the end
120     * of unmapping, or further use of the map will be incoherent with the GPU
121     * rendering done by that batchbuffer. Hence we assert in
122     * intel_batchbuffer_flush() that that doesn't happen, which means that the
123     * flush is only needed on first map of the buffer.
124     */
125 
126    if (unlikely(INTEL_DEBUG & DEBUG_PERF)) {
127       if (drm_intel_bo_busy(region->bo)) {
128          perf_debug("Mapping a busy BO, causing a stall on the GPU.\n");
129       }
130    }
131 
132    _DBG("%s %p\n", __FUNCTION__, region);
133    if (!region->map_refcount) {
134       intel_flush(&intel->ctx);
135 
136       if (region->tiling != I915_TILING_NONE)
137 	 drm_intel_gem_bo_map_gtt(region->bo);
138       else
139 	 drm_intel_bo_map(region->bo, true);
140 
141       region->map = region->bo->virtual;
142    }
143    if (region->map) {
144       intel->num_mapped_regions++;
145       region->map_refcount++;
146    }
147 
148    return region->map;
149 }
150 
151 void
intel_region_unmap(struct intel_context * intel,struct intel_region * region)152 intel_region_unmap(struct intel_context *intel, struct intel_region *region)
153 {
154    _DBG("%s %p\n", __FUNCTION__, region);
155    if (!--region->map_refcount) {
156       if (region->tiling != I915_TILING_NONE)
157 	 drm_intel_gem_bo_unmap_gtt(region->bo);
158       else
159 	 drm_intel_bo_unmap(region->bo);
160 
161       region->map = NULL;
162       --intel->num_mapped_regions;
163       assert(intel->num_mapped_regions >= 0);
164    }
165 }
166 
167 static struct intel_region *
intel_region_alloc_internal(struct intel_screen * screen,GLuint cpp,GLuint width,GLuint height,GLuint pitch,uint32_t tiling,drm_intel_bo * buffer)168 intel_region_alloc_internal(struct intel_screen *screen,
169 			    GLuint cpp,
170 			    GLuint width, GLuint height, GLuint pitch,
171 			    uint32_t tiling, drm_intel_bo *buffer)
172 {
173    struct intel_region *region;
174 
175    region = calloc(sizeof(*region), 1);
176    if (region == NULL)
177       return region;
178 
179    region->cpp = cpp;
180    region->width = width;
181    region->height = height;
182    region->pitch = pitch;
183    region->refcount = 1;
184    region->bo = buffer;
185    region->tiling = tiling;
186    region->screen = screen;
187 
188    _DBG("%s <-- %p\n", __FUNCTION__, region);
189    return region;
190 }
191 
192 struct intel_region *
intel_region_alloc(struct intel_screen * screen,uint32_t tiling,GLuint cpp,GLuint width,GLuint height,bool expect_accelerated_upload)193 intel_region_alloc(struct intel_screen *screen,
194 		   uint32_t tiling,
195                    GLuint cpp, GLuint width, GLuint height,
196 		   bool expect_accelerated_upload)
197 {
198    drm_intel_bo *buffer;
199    unsigned long flags = 0;
200    unsigned long aligned_pitch;
201    struct intel_region *region;
202 
203    if (expect_accelerated_upload)
204       flags |= BO_ALLOC_FOR_RENDER;
205 
206    buffer = drm_intel_bo_alloc_tiled(screen->bufmgr, "region",
207 				     width, height, cpp,
208 				     &tiling, &aligned_pitch, flags);
209    if (buffer == NULL)
210       return NULL;
211 
212    region = intel_region_alloc_internal(screen, cpp, width, height,
213                                         aligned_pitch / cpp, tiling, buffer);
214    if (region == NULL) {
215       drm_intel_bo_unreference(buffer);
216       return NULL;
217    }
218 
219    return region;
220 }
221 
222 bool
intel_region_flink(struct intel_region * region,uint32_t * name)223 intel_region_flink(struct intel_region *region, uint32_t *name)
224 {
225    if (region->name == 0) {
226       if (drm_intel_bo_flink(region->bo, &region->name))
227 	 return false;
228 
229       _mesa_HashInsert(region->screen->named_regions,
230 		       region->name, region);
231    }
232 
233    *name = region->name;
234 
235    return true;
236 }
237 
238 struct intel_region *
intel_region_alloc_for_handle(struct intel_screen * screen,GLuint cpp,GLuint width,GLuint height,GLuint pitch,GLuint handle,const char * name)239 intel_region_alloc_for_handle(struct intel_screen *screen,
240 			      GLuint cpp,
241 			      GLuint width, GLuint height, GLuint pitch,
242 			      GLuint handle, const char *name)
243 {
244    struct intel_region *region, *dummy;
245    drm_intel_bo *buffer;
246    int ret;
247    uint32_t bit_6_swizzle, tiling;
248 
249    region = _mesa_HashLookup(screen->named_regions, handle);
250    if (region != NULL) {
251       dummy = NULL;
252       if (region->width != width || region->height != height ||
253 	  region->cpp != cpp || region->pitch != pitch) {
254 	 fprintf(stderr,
255 		 "Region for name %d already exists but is not compatible\n",
256 		 handle);
257 	 return NULL;
258       }
259       intel_region_reference(&dummy, region);
260       return dummy;
261    }
262 
263    buffer = intel_bo_gem_create_from_name(screen->bufmgr, name, handle);
264    if (buffer == NULL)
265       return NULL;
266    ret = drm_intel_bo_get_tiling(buffer, &tiling, &bit_6_swizzle);
267    if (ret != 0) {
268       fprintf(stderr, "Couldn't get tiling of buffer %d (%s): %s\n",
269 	      handle, name, strerror(-ret));
270       drm_intel_bo_unreference(buffer);
271       return NULL;
272    }
273 
274    region = intel_region_alloc_internal(screen, cpp,
275 					width, height, pitch, tiling, buffer);
276    if (region == NULL) {
277       drm_intel_bo_unreference(buffer);
278       return NULL;
279    }
280 
281    region->name = handle;
282    _mesa_HashInsert(screen->named_regions, handle, region);
283 
284    return region;
285 }
286 
287 void
intel_region_reference(struct intel_region ** dst,struct intel_region * src)288 intel_region_reference(struct intel_region **dst, struct intel_region *src)
289 {
290    _DBG("%s: %p(%d) -> %p(%d)\n", __FUNCTION__,
291 	*dst, *dst ? (*dst)->refcount : 0, src, src ? src->refcount : 0);
292 
293    if (src != *dst) {
294       if (*dst)
295 	 intel_region_release(dst);
296 
297       if (src)
298          src->refcount++;
299       *dst = src;
300    }
301 }
302 
303 void
intel_region_release(struct intel_region ** region_handle)304 intel_region_release(struct intel_region **region_handle)
305 {
306    struct intel_region *region = *region_handle;
307 
308    if (region == NULL) {
309       _DBG("%s NULL\n", __FUNCTION__);
310       return;
311    }
312 
313    _DBG("%s %p %d\n", __FUNCTION__, region, region->refcount - 1);
314 
315    ASSERT(region->refcount > 0);
316    region->refcount--;
317 
318    if (region->refcount == 0) {
319       assert(region->map_refcount == 0);
320 
321       drm_intel_bo_unreference(region->bo);
322 
323       if (region->name > 0)
324 	 _mesa_HashRemove(region->screen->named_regions, region->name);
325 
326       free(region);
327    }
328    *region_handle = NULL;
329 }
330 
331 /*
332  * XXX Move this into core Mesa?
333  */
334 void
_mesa_copy_rect(GLubyte * dst,GLuint cpp,GLuint dst_pitch,GLuint dst_x,GLuint dst_y,GLuint width,GLuint height,const GLubyte * src,GLuint src_pitch,GLuint src_x,GLuint src_y)335 _mesa_copy_rect(GLubyte * dst,
336                 GLuint cpp,
337                 GLuint dst_pitch,
338                 GLuint dst_x,
339                 GLuint dst_y,
340                 GLuint width,
341                 GLuint height,
342                 const GLubyte * src,
343                 GLuint src_pitch, GLuint src_x, GLuint src_y)
344 {
345    GLuint i;
346 
347    dst_pitch *= cpp;
348    src_pitch *= cpp;
349    dst += dst_x * cpp;
350    src += src_x * cpp;
351    dst += dst_y * dst_pitch;
352    src += src_y * src_pitch;
353    width *= cpp;
354 
355    if (width == dst_pitch && width == src_pitch)
356       memcpy(dst, src, height * width);
357    else {
358       for (i = 0; i < height; i++) {
359          memcpy(dst, src, width);
360          dst += dst_pitch;
361          src += src_pitch;
362       }
363    }
364 }
365 
366 /* Copy rectangular sub-regions. Need better logic about when to
367  * push buffers into AGP - will currently do so whenever possible.
368  */
369 bool
intel_region_copy(struct intel_context * intel,struct intel_region * dst,GLuint dst_offset,GLuint dstx,GLuint dsty,struct intel_region * src,GLuint src_offset,GLuint srcx,GLuint srcy,GLuint width,GLuint height,bool flip,GLenum logicop)370 intel_region_copy(struct intel_context *intel,
371                   struct intel_region *dst,
372                   GLuint dst_offset,
373                   GLuint dstx, GLuint dsty,
374                   struct intel_region *src,
375                   GLuint src_offset,
376                   GLuint srcx, GLuint srcy, GLuint width, GLuint height,
377 		  bool flip,
378 		  GLenum logicop)
379 {
380    uint32_t src_pitch = src->pitch;
381 
382    _DBG("%s\n", __FUNCTION__);
383 
384    if (intel == NULL)
385       return false;
386 
387    assert(src->cpp == dst->cpp);
388 
389    if (flip)
390       src_pitch = -src_pitch;
391 
392    return intelEmitCopyBlit(intel,
393 			    dst->cpp,
394 			    src_pitch, src->bo, src_offset, src->tiling,
395 			    dst->pitch, dst->bo, dst_offset, dst->tiling,
396 			    srcx, srcy, dstx, dsty, width, height,
397 			    logicop);
398 }
399 
400 /**
401  * This function computes masks that may be used to select the bits of the X
402  * and Y coordinates that indicate the offset within a tile.  If the region is
403  * untiled, the masks are set to 0.
404  */
405 void
intel_region_get_tile_masks(struct intel_region * region,uint32_t * mask_x,uint32_t * mask_y,bool map_stencil_as_y_tiled)406 intel_region_get_tile_masks(struct intel_region *region,
407                             uint32_t *mask_x, uint32_t *mask_y,
408                             bool map_stencil_as_y_tiled)
409 {
410    int cpp = region->cpp;
411    uint32_t tiling = region->tiling;
412 
413    if (map_stencil_as_y_tiled)
414       tiling = I915_TILING_Y;
415 
416    switch (tiling) {
417    default:
418       assert(false);
419    case I915_TILING_NONE:
420       *mask_x = *mask_y = 0;
421       break;
422    case I915_TILING_X:
423       *mask_x = 512 / cpp - 1;
424       *mask_y = 7;
425       break;
426    case I915_TILING_Y:
427       *mask_x = 128 / cpp - 1;
428       *mask_y = 31;
429       break;
430    }
431 }
432 
433 /**
434  * Compute the offset (in bytes) from the start of the region to the given x
435  * and y coordinate.  For tiled regions, caller must ensure that x and y are
436  * multiples of the tile size.
437  */
438 uint32_t
intel_region_get_aligned_offset(struct intel_region * region,uint32_t x,uint32_t y,bool map_stencil_as_y_tiled)439 intel_region_get_aligned_offset(struct intel_region *region, uint32_t x,
440                                 uint32_t y, bool map_stencil_as_y_tiled)
441 {
442    int cpp = region->cpp;
443    uint32_t pitch = region->pitch * cpp;
444    uint32_t tiling = region->tiling;
445 
446    if (map_stencil_as_y_tiled) {
447       tiling = I915_TILING_Y;
448 
449       /* When mapping a W-tiled stencil buffer as Y-tiled, each 64-high W-tile
450        * gets transformed into a 32-high Y-tile.  Accordingly, the pitch of
451        * the resulting region is twice the pitch of the original region, since
452        * each row in the Y-tiled view corresponds to two rows in the actual
453        * W-tiled surface.  So we need to correct the pitch before computing
454        * the offsets.
455        */
456       pitch *= 2;
457    }
458 
459    switch (tiling) {
460    default:
461       assert(false);
462    case I915_TILING_NONE:
463       return y * pitch + x * cpp;
464    case I915_TILING_X:
465       assert((x % (512 / cpp)) == 0);
466       assert((y % 8) == 0);
467       return y * pitch + x / (512 / cpp) * 4096;
468    case I915_TILING_Y:
469       assert((x % (128 / cpp)) == 0);
470       assert((y % 32) == 0);
471       return y * pitch + x / (128 / cpp) * 4096;
472    }
473 }
474