/* * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * on the rights to use, copy, modify, merge, publish, distribute, sub * license, and/or sell copies of the Software, and to permit persons to whom * the Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. * * Authors: * Adam Rak */ #include "pipe/p_defines.h" #include "pipe/p_state.h" #include "pipe/p_context.h" #include "util/u_blitter.h" #include "util/u_double_list.h" #include "util/u_transfer.h" #include "util/u_surface.h" #include "util/u_pack_color.h" #include "util/u_memory.h" #include "util/u_inlines.h" #include "util/u_framebuffer.h" #include "r600.h" #include "r600_resource.h" #include "r600_shader.h" #include "r600_pipe.h" #include "r600_formats.h" #include "compute_memory_pool.h" #include "evergreen_compute_internal.h" static struct r600_texture * create_pool_texture(struct r600_screen * screen, unsigned size_in_dw) { struct pipe_resource templ; struct r600_texture * tex; if (size_in_dw == 0) { return NULL; } memset(&templ, 0, sizeof(templ)); templ.target = PIPE_TEXTURE_1D; templ.format = PIPE_FORMAT_R32_UINT; templ.bind = PIPE_BIND_CUSTOM; templ.usage = PIPE_USAGE_IMMUTABLE; templ.flags = 0; templ.width0 = size_in_dw; templ.height0 = 1; templ.depth0 = 1; templ.array_size = 1; tex = (struct r600_texture *)r600_texture_create( &screen->screen, &templ); /* XXX: Propagate this error */ assert(tex && "Out of memory"); tex->is_rat = 1; return tex; } /** * Creates a new pool */ struct compute_memory_pool* compute_memory_pool_new( struct r600_screen * rscreen) { struct compute_memory_pool* pool = (struct compute_memory_pool*) CALLOC(sizeof(struct compute_memory_pool), 1); COMPUTE_DBG("* compute_memory_pool_new()\n"); pool->screen = rscreen; return pool; } static void compute_memory_pool_init(struct compute_memory_pool * pool, unsigned initial_size_in_dw) { COMPUTE_DBG("* compute_memory_pool_init() initial_size_in_dw = %ld\n", initial_size_in_dw); /* XXX: pool->shadow is used when the buffer needs to be resized, but * resizing does not work at the moment. * pool->shadow = (uint32_t*)CALLOC(4, pool->size_in_dw); */ pool->next_id = 1; pool->size_in_dw = initial_size_in_dw; pool->bo = (struct r600_resource*)create_pool_texture(pool->screen, pool->size_in_dw); } /** * Frees all stuff in the pool and the pool struct itself too */ void compute_memory_pool_delete(struct compute_memory_pool* pool) { COMPUTE_DBG("* compute_memory_pool_delete()\n"); free(pool->shadow); if (pool->bo) { pool->screen->screen.resource_destroy((struct pipe_screen *) pool->screen, (struct pipe_resource *)pool->bo); } free(pool); } /** * Searches for an empty space in the pool, return with the pointer to the * allocatable space in the pool, returns -1 on failure. */ int64_t compute_memory_prealloc_chunk( struct compute_memory_pool* pool, int64_t size_in_dw) { assert(size_in_dw <= pool->size_in_dw); struct compute_memory_item *item; int last_end = 0; COMPUTE_DBG("* compute_memory_prealloc_chunk() size_in_dw = %ld\n", size_in_dw); for (item = pool->item_list; item; item = item->next) { if (item->start_in_dw > -1) { if (item->start_in_dw-last_end > size_in_dw) { return last_end; } last_end = item->start_in_dw + item->size_in_dw; last_end += (1024 - last_end % 1024); } } if (pool->size_in_dw - last_end < size_in_dw) { return -1; } return last_end; } /** * Search for the chunk where we can link our new chunk after it. */ struct compute_memory_item* compute_memory_postalloc_chunk( struct compute_memory_pool* pool, int64_t start_in_dw) { struct compute_memory_item* item; COMPUTE_DBG("* compute_memory_postalloc_chunck() start_in_dw = %ld\n", start_in_dw); for (item = pool->item_list; item; item = item->next) { if (item->next) { if (item->start_in_dw < start_in_dw && item->next->start_in_dw > start_in_dw) { return item; } } else { /* end of chain */ assert(item->start_in_dw < start_in_dw); return item; } } assert(0 && "unreachable"); return NULL; } /** * Reallocates pool, conserves data */ void compute_memory_grow_pool(struct compute_memory_pool* pool, struct pipe_context * pipe, int new_size_in_dw) { COMPUTE_DBG("* compute_memory_grow_pool() new_size_in_dw = %d\n", new_size_in_dw); assert(new_size_in_dw >= pool->size_in_dw); assert(!pool->bo && "Growing the global memory pool is not yet " "supported. You will see this message if you are trying to" "use more than 64 kb of memory"); if (!pool->bo) { compute_memory_pool_init(pool, 1024 * 16); } else { /* XXX: Growing memory pools does not work at the moment. I think * it is because we are using fragment shaders to copy data to * the new texture and some of the compute registers are being * included in the 3D command stream. */ fprintf(stderr, "Warning: growing the global memory pool to" "more than 64 kb is not yet supported\n"); new_size_in_dw += 1024 - (new_size_in_dw % 1024); COMPUTE_DBG(" Aligned size = %d\n", new_size_in_dw); compute_memory_shadow(pool, pipe, 1); pool->shadow = (uint32_t*)realloc(pool->shadow, new_size_in_dw*4); pool->size_in_dw = new_size_in_dw; pool->screen->screen.resource_destroy( (struct pipe_screen *)pool->screen, (struct pipe_resource *)pool->bo); pool->bo = (struct r600_resource*)create_pool_texture( pool->screen, pool->size_in_dw); compute_memory_shadow(pool, pipe, 0); } } /** * Copy pool from device to host, or host to device. */ void compute_memory_shadow(struct compute_memory_pool* pool, struct pipe_context * pipe, int device_to_host) { struct compute_memory_item chunk; COMPUTE_DBG("* compute_memory_shadow() device_to_host = %d\n", device_to_host); chunk.id = 0; chunk.start_in_dw = 0; chunk.size_in_dw = pool->size_in_dw; chunk.prev = chunk.next = NULL; compute_memory_transfer(pool, pipe, device_to_host, &chunk, pool->shadow, 0, pool->size_in_dw*4); } /** * Allocates pending allocations in the pool */ void compute_memory_finalize_pending(struct compute_memory_pool* pool, struct pipe_context * pipe) { struct compute_memory_item *pending_list = NULL, *end_p = NULL; struct compute_memory_item *item, *next; int64_t allocated = 0; int64_t unallocated = 0; COMPUTE_DBG("* compute_memory_finalize_pending()\n"); for (item = pool->item_list; item; item = item->next) { COMPUTE_DBG("list: %i %p\n", item->start_in_dw, item->next); } for (item = pool->item_list; item; item = next) { next = item->next; if (item->start_in_dw == -1) { if (end_p) { end_p->next = item; } else { pending_list = item; } if (item->prev) { item->prev->next = next; } else { pool->item_list = next; } if (next) { next->prev = item->prev; } item->prev = end_p; item->next = NULL; end_p = item; unallocated += item->size_in_dw+1024; } else { allocated += item->size_in_dw; } } if (pool->size_in_dw < allocated+unallocated) { compute_memory_grow_pool(pool, pipe, allocated+unallocated); } for (item = pending_list; item; item = next) { next = item->next; int64_t start_in_dw; while ((start_in_dw=compute_memory_prealloc_chunk(pool, item->size_in_dw)) == -1) { int64_t need = item->size_in_dw+2048 - (pool->size_in_dw - allocated); need += 1024 - (need % 1024); if (need > 0) { compute_memory_grow_pool(pool, pipe, pool->size_in_dw + need); } else { need = pool->size_in_dw / 10; need += 1024 - (need % 1024); compute_memory_grow_pool(pool, pipe, pool->size_in_dw + need); } } item->start_in_dw = start_in_dw; item->next = NULL; item->prev = NULL; if (pool->item_list) { struct compute_memory_item *pos; pos = compute_memory_postalloc_chunk(pool, start_in_dw); item->prev = pos; item->next = pos->next; pos->next = item; if (item->next) { item->next->prev = item; } } else { pool->item_list = item; } allocated += item->size_in_dw; } } void compute_memory_free(struct compute_memory_pool* pool, int64_t id) { struct compute_memory_item *item, *next; COMPUTE_DBG("* compute_memory_free() id + %ld \n", id); for (item = pool->item_list; item; item = next) { next = item->next; if (item->id == id) { if (item->prev) { item->prev->next = item->next; } else { pool->item_list = item->next; } if (item->next) { item->next->prev = item->prev; } free(item); return; } } fprintf(stderr, "Internal error, invalid id %ld " "for compute_memory_free\n", id); assert(0 && "error"); } /** * Creates pending allocations */ struct compute_memory_item* compute_memory_alloc( struct compute_memory_pool* pool, int64_t size_in_dw) { struct compute_memory_item *new_item; COMPUTE_DBG("* compute_memory_alloc() size_in_dw = %ld\n", size_in_dw); new_item = (struct compute_memory_item *) CALLOC(sizeof(struct compute_memory_item), 1); new_item->size_in_dw = size_in_dw; new_item->start_in_dw = -1; /* mark pending */ new_item->id = pool->next_id++; new_item->pool = pool; struct compute_memory_item *last_item; if (pool->item_list) { for (last_item = pool->item_list; last_item->next; last_item = last_item->next); last_item->next = new_item; new_item->prev = last_item; } else { pool->item_list = new_item; } return new_item; } /** * Transfer data host<->device, offset and size is in bytes */ void compute_memory_transfer( struct compute_memory_pool* pool, struct pipe_context * pipe, int device_to_host, struct compute_memory_item* chunk, void* data, int offset_in_chunk, int size) { int64_t aligned_size = pool->size_in_dw; struct pipe_resource* gart = (struct pipe_resource*)pool->bo; int64_t internal_offset = chunk->start_in_dw*4 + offset_in_chunk; struct pipe_transfer *xfer; uint32_t *map; assert(gart); COMPUTE_DBG("* compute_memory_transfer() device_to_host = %d, " "offset_in_chunk = %d, size = %d\n", device_to_host, offset_in_chunk, size); if (device_to_host) { xfer = pipe->get_transfer(pipe, gart, 0, PIPE_TRANSFER_READ, &(struct pipe_box) { .width = aligned_size, .height = 1, .depth = 1 }); assert(xfer); map = pipe->transfer_map(pipe, xfer); assert(map); memcpy(data, map + internal_offset, size); pipe->transfer_unmap(pipe, xfer); pipe->transfer_destroy(pipe, xfer); } else { xfer = pipe->get_transfer(pipe, gart, 0, PIPE_TRANSFER_WRITE, &(struct pipe_box) { .width = aligned_size, .height = 1, .depth = 1 }); assert(xfer); map = pipe->transfer_map(pipe, xfer); assert(map); memcpy(map + internal_offset, data, size); pipe->transfer_unmap(pipe, xfer); pipe->transfer_destroy(pipe, xfer); } } /** * Transfer data between chunk<->data, it is for VRAM<->GART transfers */ void compute_memory_transfer_direct( struct compute_memory_pool* pool, int chunk_to_data, struct compute_memory_item* chunk, struct r600_resource* data, int offset_in_chunk, int offset_in_data, int size) { ///TODO: DMA }