1 /*
2  * Copyright 2012 Red Hat Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19  * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20  * SOFTWARE.
21  *
22  * Authors: Ben Skeggs
23  *
24  */
25 
26 #define XFER_ARGS                                                              \
27    struct nv30_context *nv30, enum nv30_transfer_filter filter,                \
28    struct nv30_rect *src, struct nv30_rect *dst
29 
30 #include "util/u_math.h"
31 
32 #include "nouveau/nv_object.xml.h"
33 #include "nouveau/nv_m2mf.xml.h"
34 #include "nv01_2d.xml.h"
35 #include "nv30-40_3d.xml.h"
36 
37 #include "nv30_context.h"
38 #include "nv30_transfer.h"
39 
40 /* Various helper functions to transfer different types of data in a number
41  * of different ways.
42  */
43 
44 static INLINE boolean
nv30_transfer_scaled(struct nv30_rect * src,struct nv30_rect * dst)45 nv30_transfer_scaled(struct nv30_rect *src, struct nv30_rect *dst)
46 {
47    if (src->x1 - src->x0 != dst->x1 - dst->x0)
48       return TRUE;
49    if (src->y1 - src->y0 != dst->y1 - dst->y0)
50       return TRUE;
51    return FALSE;
52 }
53 
54 static INLINE boolean
nv30_transfer_blit(XFER_ARGS)55 nv30_transfer_blit(XFER_ARGS)
56 {
57    if (nv30->screen->eng3d->oclass < NV40_3D_CLASS)
58       return FALSE;
59    if (dst->offset & 63 || dst->pitch & 63 || dst->d > 1)
60       return FALSE;
61    if (dst->w < 2 || dst->h < 2)
62       return FALSE;
63    if (dst->cpp > 4 || (dst->cpp == 1 && !dst->pitch))
64       return FALSE;
65    if (src->cpp > 4)
66       return FALSE;
67    return TRUE;
68 }
69 
70 static INLINE struct nouveau_heap *
nv30_transfer_rect_vertprog(struct nv30_context * nv30)71 nv30_transfer_rect_vertprog(struct nv30_context *nv30)
72 {
73    struct nouveau_heap *heap = nv30->screen->vp_exec_heap;
74    struct nouveau_heap *vp;
75 
76    vp = nv30->blit_vp;
77    if (!vp) {
78       if (nouveau_heap_alloc(heap, 2, &nv30->blit_vp, &nv30->blit_vp)) {
79          while (heap->next && heap->size < 2) {
80             struct nouveau_heap **evict = heap->next->priv;
81             nouveau_heap_free(evict);
82          }
83 
84          if (nouveau_heap_alloc(heap, 2, &nv30->blit_vp, &nv30->blit_vp))
85             return NULL;
86       }
87 
88       vp = nv30->blit_vp;
89       if (vp) {
90          struct nouveau_pushbuf *push = nv30->base.pushbuf;
91 
92          BEGIN_NV04(push, NV30_3D(VP_UPLOAD_FROM_ID), 1);
93          PUSH_DATA (push, vp->start);
94          BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4);
95          PUSH_DATA (push, 0x401f9c6c); /* mov o[hpos], a[0]; */
96          PUSH_DATA (push, 0x0040000d);
97          PUSH_DATA (push, 0x8106c083);
98          PUSH_DATA (push, 0x6041ff80);
99          BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4);
100          PUSH_DATA (push, 0x401f9c6c); /* mov o[tex0], a[8]; end; */
101          PUSH_DATA (push, 0x0040080d);
102          PUSH_DATA (push, 0x8106c083);
103          PUSH_DATA (push, 0x6041ff9d);
104       }
105    }
106 
107    return vp;
108 }
109 
110 
111 static INLINE struct nv04_resource *
nv30_transfer_rect_fragprog(struct nv30_context * nv30)112 nv30_transfer_rect_fragprog(struct nv30_context *nv30)
113 {
114    struct nv04_resource *fp = nv04_resource(nv30->blit_fp);
115    struct pipe_context *pipe = &nv30->base.pipe;
116 
117    if (!fp) {
118       nv30->blit_fp = pipe_buffer_create(pipe->screen, 0, 0, 12 * 4);
119       if (nv30->blit_fp) {
120          struct pipe_transfer *transfer;
121          u32 *map = pipe_buffer_map(pipe, nv30->blit_fp,
122                                     PIPE_TRANSFER_WRITE, &transfer);
123          if (map) {
124             map[0] = 0x17009e00; /* texr r0, i[tex0], texture[0]; end; */
125             map[1] = 0x1c9dc801;
126             map[2] = 0x0001c800;
127             map[3] = 0x3fe1c800;
128             map[4] = 0x01401e81; /* end; */
129             map[5] = 0x1c9dc800;
130             map[6] = 0x0001c800;
131             map[7] = 0x0001c800;
132             pipe_buffer_unmap(pipe, transfer);
133          }
134 
135          fp = nv04_resource(nv30->blit_fp);
136          nouveau_buffer_migrate(&nv30->base, fp, NOUVEAU_BO_VRAM);
137       }
138    }
139 
140    return fp;
141 }
142 
143 static void
nv30_transfer_rect_blit(XFER_ARGS)144 nv30_transfer_rect_blit(XFER_ARGS)
145 {
146    struct nv04_resource *fp = nv30_transfer_rect_fragprog(nv30);
147    struct nouveau_heap *vp = nv30_transfer_rect_vertprog(nv30);
148    struct nouveau_pushbuf *push = nv30->base.pushbuf;
149    struct nouveau_pushbuf_refn refs[] = {
150       { fp->bo, fp->domain | NOUVEAU_BO_RD },
151       { src->bo, src->domain | NOUVEAU_BO_RD },
152       { dst->bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR },
153    };
154    u32 texfmt, texswz;
155    u32 format, stride;
156 
157    if (nouveau_pushbuf_space(push, 512, 8, 0) ||
158        nouveau_pushbuf_refn (push, refs, sizeof(refs) / sizeof(refs[0])))
159       return;
160 
161    /* various switches depending on cpp of the transfer */
162    switch (dst->cpp) {
163    case 4:
164       format = NV30_3D_RT_FORMAT_COLOR_A8R8G8B8 |
165                NV30_3D_RT_FORMAT_ZETA_Z24S8;
166       texfmt = NV40_3D_TEX_FORMAT_FORMAT_A8R8G8B8;
167       texswz = 0x0000aae4;
168       break;
169    case 2:
170       format = NV30_3D_RT_FORMAT_COLOR_R5G6B5 |
171                NV30_3D_RT_FORMAT_ZETA_Z16;
172       texfmt = NV40_3D_TEX_FORMAT_FORMAT_R5G6B5;
173       texswz = 0x0000a9e4;
174       break;
175    case 1:
176       format = NV30_3D_RT_FORMAT_COLOR_B8 |
177                NV30_3D_RT_FORMAT_ZETA_Z16;
178       texfmt = NV40_3D_TEX_FORMAT_FORMAT_L8;
179       texswz = 0x0000aaff;
180       break;
181    default:
182       assert(0);
183       return;
184    }
185 
186    /* render target */
187    if (!dst->pitch) {
188       format |= NV30_3D_RT_FORMAT_TYPE_SWIZZLED;
189       format |= util_logbase2(dst->w) << 16;
190       format |= util_logbase2(dst->h) << 24;
191       stride  = 64;
192    } else {
193       format |= NV30_3D_RT_FORMAT_TYPE_LINEAR;
194       stride  = dst->pitch;
195    }
196 
197    BEGIN_NV04(push, NV30_3D(VIEWPORT_HORIZ), 2);
198    PUSH_DATA (push, dst->w << 16);
199    PUSH_DATA (push, dst->h << 16);
200    BEGIN_NV04(push, NV30_3D(RT_HORIZ), 5);
201    PUSH_DATA (push, dst->w << 16);
202    PUSH_DATA (push, dst->h << 16);
203    PUSH_DATA (push, format);
204    PUSH_DATA (push, stride);
205    PUSH_RELOC(push, dst->bo, dst->offset, NOUVEAU_BO_LOW, 0, 0);
206    BEGIN_NV04(push, NV30_3D(RT_ENABLE), 1);
207    PUSH_DATA (push, NV30_3D_RT_ENABLE_COLOR0);
208 
209    nv30->dirty |= NV30_NEW_FRAMEBUFFER;
210 
211    /* viewport state */
212    BEGIN_NV04(push, NV30_3D(VIEWPORT_TRANSLATE_X), 8);
213    PUSH_DATAf(push, 0.0);
214    PUSH_DATAf(push, 0.0);
215    PUSH_DATAf(push, 0.0);
216    PUSH_DATAf(push, 0.0);
217    PUSH_DATAf(push, 1.0);
218    PUSH_DATAf(push, 1.0);
219    PUSH_DATAf(push, 1.0);
220    PUSH_DATAf(push, 1.0);
221    BEGIN_NV04(push, NV30_3D(DEPTH_RANGE_NEAR), 2);
222    PUSH_DATAf(push, 0.0);
223    PUSH_DATAf(push, 1.0);
224 
225    nv30->dirty |= NV30_NEW_VIEWPORT;
226 
227    /* blend state */
228    BEGIN_NV04(push, NV30_3D(COLOR_LOGIC_OP_ENABLE), 1);
229    PUSH_DATA (push, 0);
230    BEGIN_NV04(push, NV30_3D(DITHER_ENABLE), 1);
231    PUSH_DATA (push, 0);
232    BEGIN_NV04(push, NV30_3D(BLEND_FUNC_ENABLE), 1);
233    PUSH_DATA (push, 0);
234    BEGIN_NV04(push, NV30_3D(COLOR_MASK), 1);
235    PUSH_DATA (push, 0x01010101);
236 
237    nv30->dirty |= NV30_NEW_BLEND;
238 
239    /* depth-stencil-alpha state */
240    BEGIN_NV04(push, NV30_3D(DEPTH_WRITE_ENABLE), 2);
241    PUSH_DATA (push, 0);
242    PUSH_DATA (push, 0);
243    BEGIN_NV04(push, NV30_3D(STENCIL_ENABLE(0)), 1);
244    PUSH_DATA (push, 0);
245    BEGIN_NV04(push, NV30_3D(STENCIL_ENABLE(1)), 1);
246    PUSH_DATA (push, 0);
247    BEGIN_NV04(push, NV30_3D(ALPHA_FUNC_ENABLE), 1);
248    PUSH_DATA (push, 0);
249 
250    nv30->dirty |= NV30_NEW_ZSA;
251 
252    /* rasterizer state */
253    BEGIN_NV04(push, NV30_3D(SHADE_MODEL), 1);
254    PUSH_DATA (push, NV30_3D_SHADE_MODEL_FLAT);
255    BEGIN_NV04(push, NV30_3D(CULL_FACE_ENABLE), 1);
256    PUSH_DATA (push, 0);
257    BEGIN_NV04(push, NV30_3D(POLYGON_MODE_FRONT), 2);
258    PUSH_DATA (push, NV30_3D_POLYGON_MODE_FRONT_FILL);
259    PUSH_DATA (push, NV30_3D_POLYGON_MODE_BACK_FILL);
260    BEGIN_NV04(push, NV30_3D(POLYGON_OFFSET_FILL_ENABLE), 1);
261    PUSH_DATA (push, 0);
262    BEGIN_NV04(push, NV30_3D(POLYGON_STIPPLE_ENABLE), 1);
263    PUSH_DATA (push, 0);
264 
265    nv30->state.scissor_off = 0;
266    nv30->dirty |= NV30_NEW_RASTERIZER;
267 
268    /* vertex program */
269    BEGIN_NV04(push, NV30_3D(VP_START_FROM_ID), 1);
270    PUSH_DATA (push, vp->start);
271    BEGIN_NV04(push, NV40_3D(VP_ATTRIB_EN), 2);
272    PUSH_DATA (push, 0x00000101); /* attrib: 0, 8 */
273    PUSH_DATA (push, 0x00004000); /* result: hpos, tex0 */
274    BEGIN_NV04(push, NV30_3D(ENGINE), 1);
275    PUSH_DATA (push, 0x00000103);
276    BEGIN_NV04(push, NV30_3D(VP_CLIP_PLANES_ENABLE), 1);
277    PUSH_DATA (push, 0x00000000);
278 
279    nv30->dirty |= NV30_NEW_VERTPROG;
280    nv30->dirty |= NV30_NEW_CLIP;
281 
282    /* fragment program */
283    BEGIN_NV04(push, NV30_3D(FP_ACTIVE_PROGRAM), 1);
284    PUSH_RELOC(push, fp->bo, fp->offset, fp->domain |
285                     NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
286                     NV30_3D_FP_ACTIVE_PROGRAM_DMA0,
287                     NV30_3D_FP_ACTIVE_PROGRAM_DMA1);
288    BEGIN_NV04(push, NV30_3D(FP_CONTROL), 1);
289    PUSH_DATA (push, 0x02000000);
290 
291    nv30->state.fragprog = NULL;
292    nv30->dirty |= NV30_NEW_FRAGPROG;
293 
294    /* texture */
295    texfmt |= 1 << NV40_3D_TEX_FORMAT_MIPMAP_COUNT__SHIFT;
296    texfmt |= NV30_3D_TEX_FORMAT_NO_BORDER;
297    texfmt |= NV40_3D_TEX_FORMAT_RECT;
298    texfmt |= 0x00008000;
299    if (src->d < 2)
300       texfmt |= NV30_3D_TEX_FORMAT_DIMS_2D;
301    else
302       texfmt |= NV30_3D_TEX_FORMAT_DIMS_3D;
303    if (src->pitch)
304       texfmt |= NV40_3D_TEX_FORMAT_LINEAR;
305 
306    BEGIN_NV04(push, NV30_3D(TEX_OFFSET(0)), 8);
307    PUSH_RELOC(push, src->bo, src->offset, NOUVEAU_BO_LOW, 0, 0);
308    PUSH_RELOC(push, src->bo, texfmt, NOUVEAU_BO_OR,
309                     NV30_3D_TEX_FORMAT_DMA0, NV30_3D_TEX_FORMAT_DMA1);
310    PUSH_DATA (push, NV30_3D_TEX_WRAP_S_CLAMP_TO_EDGE |
311                     NV30_3D_TEX_WRAP_T_CLAMP_TO_EDGE |
312                     NV30_3D_TEX_WRAP_R_CLAMP_TO_EDGE);
313    PUSH_DATA (push, NV40_3D_TEX_ENABLE_ENABLE);
314    PUSH_DATA (push, texswz);
315    switch (filter) {
316    case BILINEAR:
317       PUSH_DATA (push, NV30_3D_TEX_FILTER_MIN_LINEAR |
318                        NV30_3D_TEX_FILTER_MAG_LINEAR | 0x00002000);
319       break;
320    default:
321       PUSH_DATA (push, NV30_3D_TEX_FILTER_MIN_NEAREST |
322                        NV30_3D_TEX_FILTER_MAG_NEAREST | 0x00002000);
323       break;
324    }
325    PUSH_DATA (push, (src->w << 16) | src->h);
326    PUSH_DATA (push, 0x00000000);
327    BEGIN_NV04(push, NV40_3D(TEX_SIZE1(0)), 1);
328    PUSH_DATA (push, 0x00100000 | src->pitch);
329    BEGIN_NV04(push, SUBC_3D(0x0b40), 1);
330    PUSH_DATA (push, src->d < 2 ? 0x00000001 : 0x00000000);
331    BEGIN_NV04(push, NV40_3D(TEX_CACHE_CTL), 1);
332    PUSH_DATA (push, 1);
333 
334    nv30->fragprog.dirty_samplers |= 1;
335    nv30->dirty |= NV30_NEW_FRAGTEX;
336 
337    /* blit! */
338    BEGIN_NV04(push, NV30_3D(SCISSOR_HORIZ), 2);
339    PUSH_DATA (push, (dst->x1 - dst->x0) << 16 | dst->x0);
340    PUSH_DATA (push, (dst->y1 - dst->y0) << 16 | dst->y0);
341    BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1);
342    PUSH_DATA (push, NV30_3D_VERTEX_BEGIN_END_QUADS);
343    BEGIN_NV04(push, NV30_3D(VTX_ATTR_3F(8)), 3);
344    PUSH_DATAf(push, src->x0);
345    PUSH_DATAf(push, src->y0);
346    PUSH_DATAf(push, src->z);
347    BEGIN_NV04(push, NV30_3D(VTX_ATTR_2I(0)), 1);
348    PUSH_DATA (push, (dst->y0 << 16) | dst->x0);
349    BEGIN_NV04(push, NV30_3D(VTX_ATTR_3F(8)), 3);
350    PUSH_DATAf(push, src->x1);
351    PUSH_DATAf(push, src->y0);
352    PUSH_DATAf(push, src->z);
353    BEGIN_NV04(push, NV30_3D(VTX_ATTR_2I(0)), 1);
354    PUSH_DATA (push, (dst->y0 << 16) | dst->x1);
355    BEGIN_NV04(push, NV30_3D(VTX_ATTR_3F(8)), 3);
356    PUSH_DATAf(push, src->x1);
357    PUSH_DATAf(push, src->y1);
358    PUSH_DATAf(push, src->z);
359    BEGIN_NV04(push, NV30_3D(VTX_ATTR_2I(0)), 1);
360    PUSH_DATA (push, (dst->y1 << 16) | dst->x1);
361    BEGIN_NV04(push, NV30_3D(VTX_ATTR_3F(8)), 3);
362    PUSH_DATAf(push, src->x0);
363    PUSH_DATAf(push, src->y1);
364    PUSH_DATAf(push, src->z);
365    BEGIN_NV04(push, NV30_3D(VTX_ATTR_2I(0)), 1);
366    PUSH_DATA (push, (dst->y1 << 16) | dst->x0);
367    BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1);
368    PUSH_DATA (push, NV30_3D_VERTEX_BEGIN_END_STOP);
369 }
370 
371 static boolean
nv30_transfer_sifm(XFER_ARGS)372 nv30_transfer_sifm(XFER_ARGS)
373 {
374    if (!src->pitch || (src->w | src->h) > 1024 || src->w < 2 || src->h < 2)
375       return FALSE;
376 
377    if (src->d > 1 || dst->d > 1)
378       return FALSE;
379 
380    if (dst->offset & 63)
381       return FALSE;
382 
383    if (!dst->pitch) {
384       if ((dst->w | dst->h) > 2048 || dst->w < 2 || dst->h < 2)
385          return FALSE;
386    } else {
387       if (dst->domain != NOUVEAU_BO_VRAM)
388          return FALSE;
389       if (dst->pitch & 63)
390          return FALSE;
391    }
392 
393    return TRUE;
394 }
395 
396 static void
nv30_transfer_rect_sifm(XFER_ARGS)397 nv30_transfer_rect_sifm(XFER_ARGS)
398 
399 {
400    struct nouveau_pushbuf *push = nv30->base.pushbuf;
401    struct nouveau_pushbuf_refn refs[] = {
402       { src->bo, src->domain | NOUVEAU_BO_RD },
403       { dst->bo, dst->domain | NOUVEAU_BO_WR },
404    };
405    struct nv04_fifo *fifo = push->channel->data;
406    unsigned si_fmt, si_arg;
407    unsigned ss_fmt;
408 
409    switch (dst->cpp) {
410    case 4: ss_fmt = NV04_SURFACE_SWZ_FORMAT_COLOR_A8R8G8B8; break;
411    case 2: ss_fmt = NV04_SURFACE_SWZ_FORMAT_COLOR_R5G6B5; break;
412    default:
413       ss_fmt = NV04_SURFACE_SWZ_FORMAT_COLOR_Y8;
414       break;
415    }
416 
417    switch (src->cpp) {
418    case 4: si_fmt = NV03_SIFM_COLOR_FORMAT_A8R8G8B8; break;
419    case 2: si_fmt = NV03_SIFM_COLOR_FORMAT_R5G6B5; break;
420    default:
421       si_fmt = NV03_SIFM_COLOR_FORMAT_AY8;
422       break;
423    }
424 
425    if (filter == NEAREST) {
426       si_arg  = NV03_SIFM_FORMAT_ORIGIN_CENTER;
427       si_arg |= NV03_SIFM_FORMAT_FILTER_POINT_SAMPLE;
428    } else {
429       si_arg  = NV03_SIFM_FORMAT_ORIGIN_CORNER;
430       si_arg |= NV03_SIFM_FORMAT_FILTER_BILINEAR;
431    }
432 
433    if (nouveau_pushbuf_space(push, 32, 6, 0) ||
434        nouveau_pushbuf_refn (push, refs, 2))
435       return;
436 
437    if (dst->pitch) {
438       BEGIN_NV04(push, NV04_SF2D(DMA_IMAGE_SOURCE), 2);
439       PUSH_RELOC(push, dst->bo, 0, NOUVEAU_BO_OR, fifo->vram, fifo->gart);
440       PUSH_RELOC(push, dst->bo, 0, NOUVEAU_BO_OR, fifo->vram, fifo->gart);
441       BEGIN_NV04(push, NV04_SF2D(FORMAT), 4);
442       PUSH_DATA (push, ss_fmt);
443       PUSH_DATA (push, dst->pitch << 16 | dst->pitch);
444       PUSH_RELOC(push, dst->bo, dst->offset, NOUVEAU_BO_LOW, 0, 0);
445       PUSH_RELOC(push, dst->bo, dst->offset, NOUVEAU_BO_LOW, 0, 0);
446       BEGIN_NV04(push, NV05_SIFM(SURFACE), 1);
447       PUSH_DATA (push, nv30->screen->surf2d->handle);
448    } else {
449       BEGIN_NV04(push, NV04_SSWZ(DMA_IMAGE), 1);
450       PUSH_RELOC(push, dst->bo, 0, NOUVEAU_BO_OR, fifo->vram, fifo->gart);
451       BEGIN_NV04(push, NV04_SSWZ(FORMAT), 2);
452       PUSH_DATA (push, ss_fmt | (util_logbase2(dst->w) << 16) |
453                                 (util_logbase2(dst->h) << 24));
454       PUSH_RELOC(push, dst->bo, dst->offset, NOUVEAU_BO_LOW, 0, 0);
455       BEGIN_NV04(push, NV05_SIFM(SURFACE), 1);
456       PUSH_DATA (push, nv30->screen->swzsurf->handle);
457    }
458 
459    BEGIN_NV04(push, NV03_SIFM(DMA_IMAGE), 1);
460    PUSH_RELOC(push, src->bo, 0, NOUVEAU_BO_OR, fifo->vram, fifo->gart);
461    BEGIN_NV04(push, NV03_SIFM(COLOR_FORMAT), 8);
462    PUSH_DATA (push, si_fmt);
463    PUSH_DATA (push, NV03_SIFM_OPERATION_SRCCOPY);
464    PUSH_DATA (push, (           dst->y0  << 16) |            dst->x0);
465    PUSH_DATA (push, ((dst->y1 - dst->y0) << 16) | (dst->x1 - dst->x0));
466    PUSH_DATA (push, (           dst->y0  << 16) |            dst->x0);
467    PUSH_DATA (push, ((dst->y1 - dst->y0) << 16) | (dst->x1 - dst->x0));
468    PUSH_DATA (push, ((src->x1 - src->x0) << 20) / (dst->x1 - dst->x0));
469    PUSH_DATA (push, ((src->y1 - src->y0) << 20) / (dst->y1 - dst->y0));
470    BEGIN_NV04(push, NV03_SIFM(SIZE), 4);
471    PUSH_DATA (push, align(src->h, 2) << 16 | align(src->w, 2));
472    PUSH_DATA (push, src->pitch | si_arg);
473    PUSH_RELOC(push, src->bo, src->offset, NOUVEAU_BO_LOW, 0, 0);
474    PUSH_DATA (push, (src->y0 << 20) | src->x0 << 4);
475 }
476 
477 /* The NOP+OFFSET_OUT stuff after each M2MF transfer *is* actually required
478  * to prevent some odd things from happening, easily reproducible by
479  * attempting to do conditional rendering that has a M2MF transfer done
480  * some time before it.  0x1e98 will fail with a DMA_W_PROTECTION (assuming
481  * that name is still accurate on nv4x) error.
482  */
483 
484 static boolean
nv30_transfer_m2mf(XFER_ARGS)485 nv30_transfer_m2mf(XFER_ARGS)
486 {
487    if (!src->pitch || !dst->pitch)
488       return FALSE;
489    if (nv30_transfer_scaled(src, dst))
490       return FALSE;
491    return TRUE;
492 }
493 
494 static void
nv30_transfer_rect_m2mf(XFER_ARGS)495 nv30_transfer_rect_m2mf(XFER_ARGS)
496 {
497    struct nouveau_pushbuf *push = nv30->base.pushbuf;
498    struct nouveau_pushbuf_refn refs[] = {
499       { src->bo, src->domain | NOUVEAU_BO_RD },
500       { dst->bo, dst->domain | NOUVEAU_BO_WR },
501    };
502    struct nv04_fifo *fifo = push->channel->data;
503    unsigned src_offset = src->offset;
504    unsigned dst_offset = dst->offset;
505    unsigned w = dst->x1 - dst->x0;
506    unsigned h = dst->y1 - dst->y0;
507 
508    src_offset += (src->y0 * src->pitch) + (src->x0 * src->cpp);
509    dst_offset += (dst->y0 * dst->pitch) + (dst->x0 * dst->cpp);
510 
511    BEGIN_NV04(push, NV03_M2MF(DMA_BUFFER_IN), 2);
512    PUSH_DATA (push, (src->domain == NOUVEAU_BO_VRAM) ? fifo->vram : fifo->gart);
513    PUSH_DATA (push, (dst->domain == NOUVEAU_BO_VRAM) ? fifo->vram : fifo->gart);
514 
515    while (h) {
516       unsigned lines = (h > 2047) ? 2047 : h;
517 
518       if (nouveau_pushbuf_space(push, 13, 2, 0) ||
519           nouveau_pushbuf_refn (push, refs, 2))
520          return;
521 
522       BEGIN_NV04(push, NV03_M2MF(OFFSET_IN), 8);
523       PUSH_RELOC(push, src->bo, src_offset, NOUVEAU_BO_LOW, 0, 0);
524       PUSH_RELOC(push, dst->bo, dst_offset, NOUVEAU_BO_LOW, 0, 0);
525       PUSH_DATA (push, src->pitch);
526       PUSH_DATA (push, dst->pitch);
527       PUSH_DATA (push, w * src->cpp);
528       PUSH_DATA (push, lines);
529       PUSH_DATA (push, NV03_M2MF_FORMAT_INPUT_INC_1 |
530                        NV03_M2MF_FORMAT_OUTPUT_INC_1);
531       PUSH_DATA (push, 0x00000000);
532       BEGIN_NV04(push, NV04_GRAPH(M2MF, NOP), 1);
533       PUSH_DATA (push, 0x00000000);
534       BEGIN_NV04(push, NV03_M2MF(OFFSET_OUT), 1);
535       PUSH_DATA (push, 0x00000000);
536 
537       h -= lines;
538       src_offset += src->pitch * lines;
539       dst_offset += dst->pitch * lines;
540    }
541 }
542 
543 static boolean
nv30_transfer_cpu(XFER_ARGS)544 nv30_transfer_cpu(XFER_ARGS)
545 {
546    if (nv30_transfer_scaled(src, dst))
547       return FALSE;
548    return TRUE;
549 }
550 
551 static char *
linear_ptr(struct nv30_rect * rect,char * base,int x,int y,int z)552 linear_ptr(struct nv30_rect *rect, char *base, int x, int y, int z)
553 {
554    return base + (y * rect->pitch) + (x * rect->cpp);
555 }
556 
557 static INLINE unsigned
swizzle2d(unsigned v,unsigned s)558 swizzle2d(unsigned v, unsigned s)
559 {
560    v = (v | (v << 8)) & 0x00ff00ff;
561    v = (v | (v << 4)) & 0x0f0f0f0f;
562    v = (v | (v << 2)) & 0x33333333;
563    v = (v | (v << 1)) & 0x55555555;
564    return v << s;
565 }
566 
567 static char *
swizzle2d_ptr(struct nv30_rect * rect,char * base,int x,int y,int z)568 swizzle2d_ptr(struct nv30_rect *rect, char *base, int x, int y, int z)
569 {
570    unsigned k = util_logbase2(MIN2(rect->w, rect->h));
571    unsigned km = (1 << k) - 1;
572    unsigned nx = rect->w >> k;
573    unsigned tx = x >> k;
574    unsigned ty = y >> k;
575    unsigned m;
576 
577    m  = swizzle2d(x & km, 0);
578    m |= swizzle2d(y & km, 1);
579    m += ((ty * nx) + tx) << k << k;
580 
581    return base + (m * rect->cpp);
582 }
583 
584 static char *
swizzle3d_ptr(struct nv30_rect * rect,char * base,int x,int y,int z)585 swizzle3d_ptr(struct nv30_rect *rect, char *base, int x, int y, int z)
586 {
587    unsigned w = rect->w >> 1;
588    unsigned h = rect->h >> 1;
589    unsigned d = rect->d >> 1;
590    unsigned i = 0, o;
591    unsigned v = 0;
592 
593    do {
594       o = i;
595       if (w) {
596          v |= (x & 1) << i++;
597          x >>= 1;
598          w >>= 1;
599       }
600       if (h) {
601          v |= (y & 1) << i++;
602          y >>= 1;
603          h >>= 1;
604       }
605       if (d) {
606          v |= (z & 1) << i++;
607          z >>= 1;
608          d >>= 1;
609       }
610    } while(o != i);
611 
612    return base + (v * rect->cpp);
613 }
614 
615 typedef char *(*get_ptr_t)(struct nv30_rect *, char *, int, int, int);
616 
617 static INLINE get_ptr_t
get_ptr(struct nv30_rect * rect)618 get_ptr(struct nv30_rect *rect)
619 {
620    if (rect->pitch)
621       return linear_ptr;
622 
623    if (rect->d <= 1)
624       return swizzle2d_ptr;
625 
626    return swizzle3d_ptr;
627 }
628 
629 static void
nv30_transfer_rect_cpu(XFER_ARGS)630 nv30_transfer_rect_cpu(XFER_ARGS)
631 {
632    get_ptr_t sp = get_ptr(src);
633    get_ptr_t dp = get_ptr(dst);
634    char *srcmap, *dstmap;
635    int x, y;
636 
637    nouveau_bo_map(src->bo, NOUVEAU_BO_RD, nv30->base.client);
638    nouveau_bo_map(dst->bo, NOUVEAU_BO_WR, nv30->base.client);
639    srcmap = src->bo->map + src->offset;
640    dstmap = dst->bo->map + dst->offset;
641 
642    for (y = 0; y < (dst->y1 - dst->y0); y++) {
643       for (x = 0; x < (dst->x1 - dst->x0); x++) {
644          memcpy(dp(dst, dstmap, dst->x0 + x, dst->y0 + y, dst->z),
645                 sp(src, srcmap, src->x0 + x, src->y0 + y, src->z), dst->cpp);
646       }
647    }
648 }
649 
650 void
nv30_transfer_rect(struct nv30_context * nv30,enum nv30_transfer_filter filter,struct nv30_rect * src,struct nv30_rect * dst)651 nv30_transfer_rect(struct nv30_context *nv30, enum nv30_transfer_filter filter,
652                    struct nv30_rect *src, struct nv30_rect *dst)
653 {
654    static const struct {
655       char *name;
656       boolean (*possible)(XFER_ARGS);
657       void (*execute)(XFER_ARGS);
658    } *method, methods[] = {
659       { "m2mf", nv30_transfer_m2mf, nv30_transfer_rect_m2mf },
660       { "sifm", nv30_transfer_sifm, nv30_transfer_rect_sifm },
661       { "blit", nv30_transfer_blit, nv30_transfer_rect_blit },
662       { "rect", nv30_transfer_cpu, nv30_transfer_rect_cpu },
663       {}
664    };
665 
666    method = methods - 1;
667    while ((++method)->possible) {
668       if (method->possible(nv30, filter, src, dst)) {
669          method->execute(nv30, filter, src, dst);
670          return;
671       }
672    }
673 
674    assert(0);
675 }
676 
677 void
nv30_transfer_push_data(struct nouveau_context * nv,struct nouveau_bo * bo,unsigned offset,unsigned domain,unsigned size,void * data)678 nv30_transfer_push_data(struct nouveau_context *nv,
679                         struct nouveau_bo *bo, unsigned offset, unsigned domain,
680                         unsigned size, void *data)
681 {
682    /* use ifc, or scratch + copy_data? */
683    fprintf(stderr, "nv30: push_data not implemented\n");
684 }
685 
686 void
nv30_transfer_copy_data(struct nouveau_context * nv,struct nouveau_bo * dst,unsigned d_off,unsigned d_dom,struct nouveau_bo * src,unsigned s_off,unsigned s_dom,unsigned size)687 nv30_transfer_copy_data(struct nouveau_context *nv,
688                         struct nouveau_bo *dst, unsigned d_off, unsigned d_dom,
689                         struct nouveau_bo *src, unsigned s_off, unsigned s_dom,
690                         unsigned size)
691 {
692    struct nv04_fifo *fifo = nv->screen->channel->data;
693    struct nouveau_pushbuf_refn refs[] = {
694       { src, s_dom | NOUVEAU_BO_RD },
695       { dst, d_dom | NOUVEAU_BO_WR },
696    };
697    struct nouveau_pushbuf *push = nv->pushbuf;
698    unsigned pages, lines;
699 
700    pages = size >> 12;
701    size -= (pages << 12);
702 
703    BEGIN_NV04(push, NV03_M2MF(DMA_BUFFER_IN), 2);
704    PUSH_DATA (push, (s_dom == NOUVEAU_BO_VRAM) ? fifo->vram : fifo->gart);
705    PUSH_DATA (push, (d_dom == NOUVEAU_BO_VRAM) ? fifo->vram : fifo->gart);
706 
707    while (pages) {
708       lines  = (pages > 2047) ? 2047 : pages;
709       pages -= lines;
710 
711       if (nouveau_pushbuf_space(push, 13, 2, 0) ||
712           nouveau_pushbuf_refn (push, refs, 2))
713          return;
714 
715       BEGIN_NV04(push, NV03_M2MF(OFFSET_IN), 8);
716       PUSH_RELOC(push, src, s_off, NOUVEAU_BO_LOW, 0, 0);
717       PUSH_RELOC(push, dst, d_off, NOUVEAU_BO_LOW, 0, 0);
718       PUSH_DATA (push, 4096);
719       PUSH_DATA (push, 4096);
720       PUSH_DATA (push, 4096);
721       PUSH_DATA (push, lines);
722       PUSH_DATA (push, NV03_M2MF_FORMAT_INPUT_INC_1 |
723                        NV03_M2MF_FORMAT_OUTPUT_INC_1);
724       PUSH_DATA (push, 0x00000000);
725       BEGIN_NV04(push, NV04_GRAPH(M2MF, NOP), 1);
726       PUSH_DATA (push, 0x00000000);
727       BEGIN_NV04(push, NV03_M2MF(OFFSET_OUT), 1);
728       PUSH_DATA (push, 0x00000000);
729 
730       s_off += (lines << 12);
731       d_off += (lines << 12);
732    }
733 
734    if (size) {
735       if (nouveau_pushbuf_space(push, 13, 2, 0) ||
736           nouveau_pushbuf_refn (push, refs, 2))
737          return;
738 
739       BEGIN_NV04(push, NV03_M2MF(OFFSET_IN), 8);
740       PUSH_RELOC(push, src, s_off, NOUVEAU_BO_LOW, 0, 0);
741       PUSH_RELOC(push, dst, d_off, NOUVEAU_BO_LOW, 0, 0);
742       PUSH_DATA (push, size);
743       PUSH_DATA (push, size);
744       PUSH_DATA (push, size);
745       PUSH_DATA (push, 1);
746       PUSH_DATA (push, NV03_M2MF_FORMAT_INPUT_INC_1 |
747                        NV03_M2MF_FORMAT_OUTPUT_INC_1);
748       PUSH_DATA (push, 0x00000000);
749       BEGIN_NV04(push, NV04_GRAPH(M2MF, NOP), 1);
750       PUSH_DATA (push, 0x00000000);
751       BEGIN_NV04(push, NV03_M2MF(OFFSET_OUT), 1);
752       PUSH_DATA (push, 0x00000000);
753    }
754 }
755