1 /**************************************************************************
2  *
3  * Copyright 2010 VMware, Inc.  All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the
14  * next paragraph) shall be included in all copies or substantial portions
15  * of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
20  * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
21  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24  *
25  **************************************************************************/
26 
27 
28 /**
29  * Code to convert images from tiled to linear and back.
30  * XXX there are quite a few assumptions about color and z/stencil being
31  * 32bpp.
32  */
33 
34 
35 #include "util/u_format.h"
36 #include "util/u_memory.h"
37 #include "lp_tile_soa.h"
38 #include "lp_tile_image.h"
39 
40 
41 #define BYTES_PER_TILE (TILE_SIZE * TILE_SIZE * 4)
42 
43 
44 /**
45  * Untile a 4x4 block of 32-bit words (all contiguous) to linear layout
46  * at dst, with dst_stride words between rows.
47  */
48 static void
untile_4_4_uint32(const uint32_t * src,uint32_t * dst,unsigned dst_stride)49 untile_4_4_uint32(const uint32_t *src, uint32_t *dst, unsigned dst_stride)
50 {
51    uint32_t *d0 = dst;
52    uint32_t *d1 = d0 + dst_stride;
53    uint32_t *d2 = d1 + dst_stride;
54    uint32_t *d3 = d2 + dst_stride;
55 
56    d0[0] = src[0];   d0[1] = src[1];   d0[2] = src[4];   d0[3] = src[5];
57    d1[0] = src[2];   d1[1] = src[3];   d1[2] = src[6];   d1[3] = src[7];
58    d2[0] = src[8];   d2[1] = src[9];   d2[2] = src[12];  d2[3] = src[13];
59    d3[0] = src[10];  d3[1] = src[11];  d3[2] = src[14];  d3[3] = src[15];
60 }
61 
62 
63 
64 /**
65  * Untile a 4x4 block of 16-bit words (all contiguous) to linear layout
66  * at dst, with dst_stride words between rows.
67  */
68 static void
untile_4_4_uint16(const uint16_t * src,uint16_t * dst,unsigned dst_stride)69 untile_4_4_uint16(const uint16_t *src, uint16_t *dst, unsigned dst_stride)
70 {
71    uint16_t *d0 = dst;
72    uint16_t *d1 = d0 + dst_stride;
73    uint16_t *d2 = d1 + dst_stride;
74    uint16_t *d3 = d2 + dst_stride;
75 
76    d0[0] = src[0];   d0[1] = src[1];   d0[2] = src[4];   d0[3] = src[5];
77    d1[0] = src[2];   d1[1] = src[3];   d1[2] = src[6];   d1[3] = src[7];
78    d2[0] = src[8];   d2[1] = src[9];   d2[2] = src[12];  d2[3] = src[13];
79    d3[0] = src[10];  d3[1] = src[11];  d3[2] = src[14];  d3[3] = src[15];
80 }
81 
82 
83 
84 /**
85  * Convert a 4x4 rect of 32-bit words from a linear layout into tiled
86  * layout (in which all 16 words are contiguous).
87  */
88 static void
tile_4_4_uint32(const uint32_t * src,uint32_t * dst,unsigned src_stride)89 tile_4_4_uint32(const uint32_t *src, uint32_t *dst, unsigned src_stride)
90 {
91    const uint32_t *s0 = src;
92    const uint32_t *s1 = s0 + src_stride;
93    const uint32_t *s2 = s1 + src_stride;
94    const uint32_t *s3 = s2 + src_stride;
95 
96    dst[0] = s0[0];   dst[1] = s0[1];   dst[4] = s0[2];   dst[5] = s0[3];
97    dst[2] = s1[0];   dst[3] = s1[1];   dst[6] = s1[2];   dst[7] = s1[3];
98    dst[8] = s2[0];   dst[9] = s2[1];   dst[12] = s2[2];  dst[13] = s2[3];
99    dst[10] = s3[0];  dst[11] = s3[1];  dst[14] = s3[2];  dst[15] = s3[3];
100 }
101 
102 
103 
104 /**
105  * Convert a 4x4 rect of 16-bit words from a linear layout into tiled
106  * layout (in which all 16 words are contiguous).
107  */
108 static void
tile_4_4_uint16(const uint16_t * src,uint16_t * dst,unsigned src_stride)109 tile_4_4_uint16(const uint16_t *src, uint16_t *dst, unsigned src_stride)
110 {
111    const uint16_t *s0 = src;
112    const uint16_t *s1 = s0 + src_stride;
113    const uint16_t *s2 = s1 + src_stride;
114    const uint16_t *s3 = s2 + src_stride;
115 
116    dst[0] = s0[0];   dst[1] = s0[1];   dst[4] = s0[2];   dst[5] = s0[3];
117    dst[2] = s1[0];   dst[3] = s1[1];   dst[6] = s1[2];   dst[7] = s1[3];
118    dst[8] = s2[0];   dst[9] = s2[1];   dst[12] = s2[2];  dst[13] = s2[3];
119    dst[10] = s3[0];  dst[11] = s3[1];  dst[14] = s3[2];  dst[15] = s3[3];
120 }
121 
122 
123 
124 /**
125  * Convert a tiled image into a linear image.
126  * \param dst_stride  dest row stride in bytes
127  */
128 void
lp_tiled_to_linear(const void * src,void * dst,unsigned x,unsigned y,unsigned width,unsigned height,enum pipe_format format,unsigned dst_stride,unsigned tiles_per_row)129 lp_tiled_to_linear(const void *src, void *dst,
130                    unsigned x, unsigned y,
131                    unsigned width, unsigned height,
132                    enum pipe_format format,
133                    unsigned dst_stride,
134                    unsigned tiles_per_row)
135 {
136    assert(x % TILE_SIZE == 0);
137    assert(y % TILE_SIZE == 0);
138    /*assert(width % TILE_SIZE == 0);
139      assert(height % TILE_SIZE == 0);*/
140 
141    /* Note that Z/stencil surfaces use a different tiling size than
142     * color surfaces.
143     */
144    if (util_format_is_depth_or_stencil(format)) {
145       const uint bpp = util_format_get_blocksize(format);
146       const uint src_stride = dst_stride * TILE_VECTOR_WIDTH;
147       const uint tile_w = TILE_VECTOR_WIDTH, tile_h = TILE_VECTOR_HEIGHT;
148       const uint tiles_per_row = src_stride / (tile_w * tile_h * bpp);
149 
150       dst_stride /= bpp;   /* convert from bytes to words */
151 
152       if (bpp == 4) {
153          const uint32_t *src32 = (const uint32_t *) src;
154          uint32_t *dst32 = (uint32_t *) dst;
155          uint i, j;
156 
157          for (j = 0; j < height; j += tile_h) {
158             for (i = 0; i < width; i += tile_w) {
159                /* compute offsets in 32-bit words */
160                uint ii = i + x, jj = j + y;
161                uint src_offset = (jj / tile_h * tiles_per_row + ii / tile_w)
162                   * (tile_w * tile_h);
163                uint dst_offset = jj * dst_stride + ii;
164                untile_4_4_uint32(src32 + src_offset,
165                                  dst32 + dst_offset,
166                                  dst_stride);
167             }
168          }
169       }
170       else {
171          const uint16_t *src16 = (const uint16_t *) src;
172          uint16_t *dst16 = (uint16_t *) dst;
173          uint i, j;
174 
175          assert(bpp == 2);
176 
177          for (j = 0; j < height; j += tile_h) {
178             for (i = 0; i < width; i += tile_w) {
179                /* compute offsets in 16-bit words */
180                uint ii = i + x, jj = j + y;
181                uint src_offset = (jj / tile_h * tiles_per_row + ii / tile_w)
182                   * (tile_w * tile_h);
183                uint dst_offset = jj * dst_stride + ii;
184                untile_4_4_uint16(src16 + src_offset,
185                                  dst16 + dst_offset,
186                                  dst_stride);
187             }
188          }
189       }
190    }
191    else {
192       /* color image */
193       const uint bpp = 4;
194       const uint tile_w = TILE_SIZE, tile_h = TILE_SIZE;
195       const uint bytes_per_tile = tile_w * tile_h * bpp;
196       uint i, j;
197 
198       for (j = 0; j < height; j += tile_h) {
199          for (i = 0; i < width; i += tile_w) {
200             uint ii = i + x, jj = j + y;
201             uint tile_offset = ((jj / tile_h) * tiles_per_row + ii / tile_w);
202             uint byte_offset = tile_offset * bytes_per_tile;
203             const uint8_t *src_tile = (uint8_t *) src + byte_offset;
204 
205             lp_tile_unswizzle_4ub(format,
206                               src_tile,
207                               dst, dst_stride,
208                               ii, jj);
209          }
210       }
211    }
212 }
213 
214 
215 /**
216  * Convert a linear image into a tiled image.
217  * \param src_stride  source row stride in bytes
218  */
219 void
lp_linear_to_tiled(const void * src,void * dst,unsigned x,unsigned y,unsigned width,unsigned height,enum pipe_format format,unsigned src_stride,unsigned tiles_per_row)220 lp_linear_to_tiled(const void *src, void *dst,
221                    unsigned x, unsigned y,
222                    unsigned width, unsigned height,
223                    enum pipe_format format,
224                    unsigned src_stride,
225                    unsigned tiles_per_row)
226 {
227    assert(x % TILE_SIZE == 0);
228    assert(y % TILE_SIZE == 0);
229    /*
230    assert(width % TILE_SIZE == 0);
231    assert(height % TILE_SIZE == 0);
232    */
233 
234    if (util_format_is_depth_or_stencil(format)) {
235       const uint bpp = util_format_get_blocksize(format);
236       const uint dst_stride = src_stride * TILE_VECTOR_WIDTH;
237       const uint tile_w = TILE_VECTOR_WIDTH, tile_h = TILE_VECTOR_HEIGHT;
238       const uint tiles_per_row = dst_stride / (tile_w * tile_h * bpp);
239 
240       src_stride /= bpp;   /* convert from bytes to words */
241 
242       if (bpp == 4) {
243          const uint32_t *src32 = (const uint32_t *) src;
244          uint32_t *dst32 = (uint32_t *) dst;
245          uint i, j;
246 
247          for (j = 0; j < height; j += tile_h) {
248             for (i = 0; i < width; i += tile_w) {
249                /* compute offsets in 32-bit words */
250                uint ii = i + x, jj = j + y;
251                uint src_offset = jj * src_stride + ii;
252                uint dst_offset = (jj / tile_h * tiles_per_row + ii / tile_w)
253                   * (tile_w * tile_h);
254                tile_4_4_uint32(src32 + src_offset,
255                                dst32 + dst_offset,
256                                src_stride);
257             }
258          }
259       }
260       else {
261          const uint16_t *src16 = (const uint16_t *) src;
262          uint16_t *dst16 = (uint16_t *) dst;
263          uint i, j;
264 
265          assert(bpp == 2);
266 
267          for (j = 0; j < height; j += tile_h) {
268             for (i = 0; i < width; i += tile_w) {
269                /* compute offsets in 16-bit words */
270                uint ii = i + x, jj = j + y;
271                uint src_offset = jj * src_stride + ii;
272                uint dst_offset = (jj / tile_h * tiles_per_row + ii / tile_w)
273                   * (tile_w * tile_h);
274                tile_4_4_uint16(src16 + src_offset,
275                                dst16 + dst_offset,
276                                src_stride);
277             }
278          }
279       }
280    }
281    else {
282       const uint bpp = 4;
283       const uint tile_w = TILE_SIZE, tile_h = TILE_SIZE;
284       const uint bytes_per_tile = tile_w * tile_h * bpp;
285       uint i, j;
286 
287       for (j = 0; j < height; j += TILE_SIZE) {
288          for (i = 0; i < width; i += TILE_SIZE) {
289             uint ii = i + x, jj = j + y;
290             uint tile_offset = ((jj / tile_h) * tiles_per_row + ii / tile_w);
291             uint byte_offset = tile_offset * bytes_per_tile;
292             uint8_t *dst_tile = (uint8_t *) dst + byte_offset;
293 
294             lp_tile_swizzle_4ub(format,
295                              dst_tile,
296                              src, src_stride,
297                              ii, jj);
298          }
299       }
300    }
301 }
302 
303 
304 /**
305  * For testing only.
306  */
307 void
test_tiled_linear_conversion(void * data,enum pipe_format format,unsigned width,unsigned height,unsigned stride)308 test_tiled_linear_conversion(void *data,
309                              enum pipe_format format,
310                              unsigned width, unsigned height,
311                              unsigned stride)
312 {
313    /* size in tiles */
314    unsigned wt = (width + TILE_SIZE - 1) / TILE_SIZE;
315    unsigned ht = (height + TILE_SIZE - 1) / TILE_SIZE;
316 
317    uint8_t *tiled = MALLOC(wt * ht * TILE_SIZE * TILE_SIZE * 4);
318 
319    /*unsigned tiled_stride = wt * TILE_SIZE * TILE_SIZE * 4;*/
320 
321    lp_linear_to_tiled(data, tiled, 0, 0, width, height, format,
322                       stride, wt);
323 
324    lp_tiled_to_linear(tiled, data, 0, 0, width, height, format,
325                       stride, wt);
326 
327    FREE(tiled);
328 }
329 
330