1
2/*
3 * Mesa 3-D graphics library
4 *
5 * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 * OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26#ifdef USE_3DNOW_ASM
27#include "assyntax.h"
28#include "matypes.h"
29#include "xform_args.h"
30
31    SEG_TEXT
32
33#define FRAME_OFFSET	4
34
35
36ALIGNTEXT16
37GLOBL GLNAME( _mesa_3dnow_transform_points4_general )
38HIDDEN(_mesa_3dnow_transform_points4_general)
39GLNAME( _mesa_3dnow_transform_points4_general ):
40
41    PUSH_L    ( ESI )
42
43    MOV_L     ( ARG_DEST, ECX )
44    MOV_L     ( ARG_MATRIX, ESI )
45    MOV_L     ( ARG_SOURCE, EAX )
46    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
47    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
48    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
49    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
50
51    PUSH_L    ( EDI )
52
53    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
54    MOV_L     ( ESI, ECX )
55    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
56    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
57    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
58
59    TEST_L    ( ESI, ESI )
60    JZ        ( LLBL( G3TPGR_2 ) )
61
62    PREFETCHW ( REGIND(EDX) )
63
64ALIGNTEXT16
65LLBL( G3TPGR_1 ):
66
67    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
68
69    MOVQ      ( REGIND(EAX), MM0 )	/* x1            | x0                */
70    MOVQ      ( REGOFF(8, EAX), MM4 )	/* x3            | x2                */
71
72    ADD_L     ( EDI, EAX )		/* next vertex                       */
73    PREFETCH  ( REGIND(EAX) )
74
75    MOVQ      ( MM0, MM2 )		/* x1              | x0              */
76    MOVQ      ( MM4, MM6 )		/* x3              | x2              */
77
78    PUNPCKLDQ ( MM0, MM0 )		/* x0              | x0              */
79    PUNPCKHDQ ( MM2, MM2 )		/* x1              | x1              */
80
81    MOVQ      ( MM0, MM1 )		/* x0              | x0              */
82    ADD_L     ( CONST(16), EDX )	/* next r                            */
83
84    PFMUL     ( REGIND(ECX), MM0 )	/* x0*m1           | x0*m0           */
85    MOVQ      ( MM2, MM3 )		/* x1              | x1              */
86
87    PFMUL     ( REGOFF(8, ECX), MM1 )	/* x0*m3           | x0*m2           */
88    PUNPCKLDQ ( MM4, MM4 )		/* x2              | x2              */
89
90    PFMUL     ( REGOFF(16, ECX), MM2 )	/* x1*m5           | x1*m4           */
91    MOVQ      ( MM4, MM5 )		/* x2              | x2              */
92
93    PFMUL     ( REGOFF(24, ECX), MM3 )	/* x1*m7           | x1*m6           */
94    PUNPCKHDQ ( MM6, MM6 )		/* x3              | x3              */
95
96    PFMUL     ( REGOFF(32, ECX), MM4 )	/* x2*m9           | x2*m8           */
97    MOVQ      ( MM6, MM7 )		/* x3              | x3              */
98
99    PFMUL     ( REGOFF(40, ECX), MM5 )	/* x2*m11          | x2*m10          */
100    PFADD     ( MM0, MM2 )
101
102    PFMUL     ( REGOFF(48, ECX), MM6 )	/* x3*m13          | x3*m12          */
103    PFADD     ( MM1, MM3 )
104
105    PFMUL     ( REGOFF(56, ECX), MM7 )	/* x3*m15          | x3*m14          */
106    PFADD     ( MM4, MM6 )
107
108    PFADD     ( MM5, MM7 )
109    PFADD     ( MM2, MM6 )
110
111    PFADD     ( MM3, MM7 )
112    MOVQ      ( MM6, REGOFF(-16, EDX) )
113
114    MOVQ      ( MM7, REGOFF(-8, EDX) )
115
116    DEC_L     ( ESI )			/* decrement vertex counter          */
117    JNZ       ( LLBL( G3TPGR_1 ) )	/* cnt > 0 ? -> process next vertex  */
118
119LLBL( G3TPGR_2 ):
120
121    FEMMS
122    POP_L     ( EDI )
123    POP_L     ( ESI )
124    RET
125
126
127
128
129ALIGNTEXT16
130GLOBL GLNAME( _mesa_3dnow_transform_points4_perspective )
131HIDDEN(_mesa_3dnow_transform_points4_perspective)
132GLNAME( _mesa_3dnow_transform_points4_perspective ):
133
134    PUSH_L    ( ESI )
135
136    MOV_L     ( ARG_DEST, ECX )
137    MOV_L     ( ARG_MATRIX, ESI )
138    MOV_L     ( ARG_SOURCE, EAX )
139    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
140    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
141    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
142    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
143
144    PUSH_L    ( EDI )
145
146    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
147    MOV_L     ( ESI, ECX )
148    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
149    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
150    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
151
152    TEST_L    ( ESI, ESI )
153    JZ        ( LLBL( G3TPPR_2 ) )
154
155    PREFETCH  ( REGIND(EAX) )
156    PREFETCHW ( REGIND(EDX) )
157
158    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
159    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
160
161    MOVD      ( REGOFF(40, ECX), MM1 )	/*                 | m22             */
162    PUNPCKLDQ ( REGOFF(56, ECX), MM1 )	/* m32             | m22             */
163
164    MOVQ      ( REGOFF(32, ECX), MM2 )	/* m21             | m20             */
165    PXOR      ( MM7, MM7 )		/* 0               | 0               */
166
167ALIGNTEXT16
168LLBL( G3TPPR_1 ):
169
170    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
171
172    MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
173    MOVQ      ( REGOFF(8, EAX), MM5 )	/* x3              | x2              */
174    MOVD      ( REGOFF(8, EAX), MM3 )	/*                 | x2              */
175
176    ADD_L     ( EDI, EAX )		/* next vertex                       */
177    PREFETCH  ( REGOFF(32, EAX) )	/* hopefully stride is zero          */
178
179    MOVQ      ( MM5, MM6 )		/* x3              | x2              */
180    PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
181
182    PUNPCKLDQ ( MM5, MM5 )		/* x2              | x2              */
183    ADD_L     ( CONST(16), EDX )	/* next r                            */
184
185    PFMUL     ( MM2, MM5 )		/* x2*m21          | x2*m20          */
186    PFSUBR    ( MM7, MM3 )		/*                 | -x2             */
187
188    PFMUL     ( MM1, MM6 )		/* x3*m32          | x2*m22          */
189    PFADD     ( MM4, MM5 )		/* x1*m11+x2*m21   | x0*m00+x2*m20   */
190
191    PFACC     ( MM3, MM6 )		/* -x2             | x2*m22+x3*m32   */
192    MOVQ      ( MM5, REGOFF(-16, EDX) )	/* write r0, r1                      */
193
194    MOVQ      ( MM6, REGOFF(-8, EDX) )	/* write r2, r3                      */
195    DEC_L     ( ESI )			/* decrement vertex counter          */
196
197    JNZ       ( LLBL( G3TPPR_1 ) )	/* cnt > 0 ? -> process next vertex  */
198
199LLBL( G3TPPR_2 ):
200
201    FEMMS
202    POP_L     ( EDI )
203    POP_L     ( ESI )
204    RET
205
206
207
208
209ALIGNTEXT16
210GLOBL GLNAME( _mesa_3dnow_transform_points4_3d )
211HIDDEN(_mesa_3dnow_transform_points4_3d)
212GLNAME( _mesa_3dnow_transform_points4_3d ):
213
214    PUSH_L    ( ESI )
215
216    MOV_L     ( ARG_DEST, ECX )
217    MOV_L     ( ARG_MATRIX, ESI )
218    MOV_L     ( ARG_SOURCE, EAX )
219    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
220    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
221    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
222    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
223
224    PUSH_L    ( EDI )
225
226    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
227    MOV_L     ( ESI, ECX )
228    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
229    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
230    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
231
232    TEST_L    ( ESI, ESI )
233    JZ        ( LLBL( G3TP3R_2 ) )
234
235    MOVD      ( REGOFF(8, ECX), MM6 )	/*                 | m2              */
236    PUNPCKLDQ ( REGOFF(24, ECX), MM6 )	/* m6              | m2              */
237
238    MOVD      ( REGOFF(40, ECX), MM7 )	/*                 | m10             */
239    PUNPCKLDQ ( REGOFF(56, ECX), MM7 )	/* m14             | m10             */
240
241ALIGNTEXT16
242LLBL( G3TP3R_1 ):
243
244    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
245    PREFETCH  ( REGOFF(32, EAX) )	/* hopefully array is tightly packed */
246
247    MOVQ      ( REGIND(EAX), MM2 )	/* x1              | x0              */
248    MOVQ      ( REGOFF(8, EAX), MM3 )	/* x3              | x2              */
249
250    MOVQ      ( MM2, MM0 )		/* x1              | x0              */
251    MOVQ      ( MM3, MM4 )		/* x3              | x2              */
252
253    MOVQ      ( MM0, MM1 )		/* x1              | x0              */
254    MOVQ      ( MM4, MM5 )		/* x3              | x2              */
255
256    PUNPCKLDQ ( MM0, MM0 )		/* x0              | x0              */
257    PUNPCKHDQ ( MM1, MM1 )		/* x1              | x1              */
258
259    PFMUL     ( REGIND(ECX), MM0 )	/* x0*m1           | x0*m0           */
260    PUNPCKLDQ ( MM3, MM3 )		/* x2              | x2              */
261
262    PFMUL     ( REGOFF(16, ECX), MM1 )	/* x1*m5           | x1*m4           */
263    PUNPCKHDQ ( MM4, MM4 )		/* x3              | x3              */
264
265    PFMUL     ( MM6, MM2 )		/* x1*m6           | x0*m2           */
266    PFADD     ( MM0, MM1 )		/* x0*m1+x1*m5     | x0*m0+x1*m4     */
267
268    PFMUL     ( REGOFF(32, ECX), MM3 )	/* x2*m9           | x2*m8           */
269    ADD_L     ( CONST(16), EDX )	/* next r                            */
270
271    PFMUL     ( REGOFF(48, ECX), MM4 )	/* x3*m13          | x3*m12          */
272    PFADD     ( MM1, MM3 )		/* x0*m1+..+x2*m9  | x0*m0+...+x2*m8 */
273
274    PFMUL     ( MM7, MM5 )		/* x3*m14          | x2*m10          */
275    PFADD     ( MM3, MM4 )		/* r1              | r0              */
276
277    PFACC     ( MM2, MM5 )		/* x0*m2+x1*m6     | x2*m10+x3*m14   */
278    MOVD      ( REGOFF(12, EAX), MM0 )	/*                 | x3              */
279
280    ADD_L     ( EDI, EAX )		/* next vertex                       */
281    PFACC     ( MM0, MM5 )		/* r3              | r2              */
282
283    MOVQ      ( MM4, REGOFF(-16, EDX) )	/* write r0, r1                      */
284    MOVQ      ( MM5, REGOFF(-8, EDX) )	/* write r2, r3                      */
285
286    DEC_L     ( ESI )			/* decrement vertex counter          */
287    JNZ       ( LLBL( G3TP3R_1 ) )	/* cnt > 0 ? -> process next vertex  */
288
289LLBL( G3TP3R_2 ):
290
291    FEMMS
292    POP_L     ( EDI )
293    POP_L     ( ESI )
294    RET
295
296
297
298
299ALIGNTEXT16
300GLOBL GLNAME( _mesa_3dnow_transform_points4_3d_no_rot )
301HIDDEN(_mesa_3dnow_transform_points4_3d_no_rot)
302GLNAME( _mesa_3dnow_transform_points4_3d_no_rot ):
303
304    PUSH_L    ( ESI )
305    MOV_L     ( ARG_DEST, ECX )
306    MOV_L     ( ARG_MATRIX, ESI )
307    MOV_L     ( ARG_SOURCE, EAX )
308    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
309    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
310    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
311    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
312
313    PUSH_L    ( EDI )
314
315    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
316    MOV_L     ( ESI, ECX )
317    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
318    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
319    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
320
321    TEST_L    ( ESI, ESI )
322    JZ        ( LLBL( G3TP3NRR_2 ) )
323
324    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
325    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
326
327    MOVD      ( REGOFF(40, ECX), MM2 )	/*                 | m22             */
328    PUNPCKLDQ ( REGOFF(56, ECX), MM2 )	/* m32             | m22             */
329
330    MOVQ      ( REGOFF(48, ECX), MM1 )	/* m31             | m30             */
331
332ALIGNTEXT16
333LLBL( G3TP3NRR_1 ):
334
335    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
336
337    MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
338    MOVQ      ( REGOFF(8, EAX), MM5 )	/* x3              | x2              */
339    MOVD      ( REGOFF(12, EAX), MM7 )	/*                 | x3              */
340
341    ADD_L     ( EDI, EAX )		/* next vertex                       */
342    PREFETCH  ( REGOFF(32, EAX) )	/* hopefully stride is zero          */
343
344    MOVQ      ( MM5, MM6 )		/* x3              | x2              */
345    PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
346
347    PUNPCKHDQ ( MM6, MM6 )		/* x3              | x3              */
348    PFMUL     ( MM2, MM5 )		/* x3*m32          | x2*m22          */
349
350    PFMUL     ( MM1, MM6 )		/* x3*m31          | x3*m30          */
351    PFACC     ( MM7, MM5 )		/* x3              | x2*m22+x3*m32   */
352
353    PFADD     ( MM6, MM4 )		/* x1*m11+x3*m31   | x0*m00+x3*m30   */
354    ADD_L     ( CONST(16), EDX )	/* next r                            */
355
356    MOVQ      ( MM4, REGOFF(-16, EDX) )	/* write r0, r1                      */
357    MOVQ      ( MM5, REGOFF(-8, EDX) )	/* write r2, r3                      */
358
359    DEC_L     ( ESI )			/* decrement vertex counter          */
360    JNZ       ( LLBL( G3TP3NRR_1 ) )	/* cnt > 0 ? -> process next vertex  */
361
362LLBL( G3TP3NRR_2 ):
363
364    FEMMS
365    POP_L     ( EDI )
366    POP_L     ( ESI )
367    RET
368
369
370
371
372ALIGNTEXT16
373GLOBL GLNAME( _mesa_3dnow_transform_points4_2d )
374HIDDEN(_mesa_3dnow_transform_points4_2d)
375GLNAME( _mesa_3dnow_transform_points4_2d ):
376
377    PUSH_L    ( ESI )
378
379    MOV_L     ( ARG_DEST, ECX )
380    MOV_L     ( ARG_MATRIX, ESI )
381    MOV_L     ( ARG_SOURCE, EAX )
382    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
383    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
384    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
385    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
386
387    PUSH_L    ( EDI )
388
389    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
390    MOV_L     ( ESI, ECX )
391    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
392    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
393    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
394
395    TEST_L    ( ESI, ESI )
396    JZ        ( LLBL( G3TP2R_2 ) )
397
398    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
399    PUNPCKLDQ ( REGOFF(16, ECX), MM0 )	/* m10             | m00             */
400
401    MOVD      ( REGOFF(4, ECX), MM1 )	/*                 | m01             */
402    PUNPCKLDQ ( REGOFF(20, ECX), MM1 )	/* m11             | m01             */
403
404    MOVQ      ( REGOFF(48, ECX), MM2 )	/* m31             | m30             */
405
406ALIGNTEXT16
407LLBL( G3TP2R_1 ):
408
409    PREFETCHW ( REGOFF(32, EDX) )       /* prefetch 2 vertices ahead         */
410
411    MOVQ      ( REGIND(EAX), MM3 )	/* x1              | x0              */
412    MOVQ      ( REGOFF(8, EAX), MM5 )	/* x3              | x2              */
413
414    ADD_L     ( EDI, EAX )		/* next vertex                       */
415    PREFETCH  ( REGIND(EAX) )
416
417    MOVQ      ( MM3, MM4 )		/* x1              | x0              */
418    MOVQ      ( MM5, MM6 )		/* x3              | x2              */
419
420    PFMUL     ( MM1, MM4 )		/* x1*m11          | x0*m01          */
421    PUNPCKHDQ ( MM6, MM6 )		/* x3              | x3              */
422
423    PFMUL     ( MM0, MM3 )		/* x1*m10          | x0*m00          */
424    ADD_L     ( CONST(16), EDX )	/* next r                            */
425
426    PFACC     ( MM4, MM3 )		/* x0*m01+x1*m11   | x0*m00+x1*m10   */
427    PFMUL     ( MM2, MM6 )		/* x3*m31          | x3*m30          */
428
429    PFADD     ( MM6, MM3 )		/* r1              | r0              */
430    MOVQ      ( MM5, REGOFF(-8, EDX) )	/* write r2, r3                      */
431
432    MOVQ      ( MM3, REGOFF(-16, EDX) )	/* write r0, r1                      */
433
434    DEC_L     ( ESI )			/* decrement vertex counter          */
435    JNZ       ( LLBL( G3TP2R_1 ) )	/* cnt > 0 ? -> process next vertex  */
436
437LLBL( G3TP2R_2 ):
438
439    FEMMS
440    POP_L     ( EDI )
441    POP_L     ( ESI )
442    RET
443
444
445
446
447ALIGNTEXT16
448GLOBL GLNAME( _mesa_3dnow_transform_points4_2d_no_rot )
449HIDDEN(_mesa_3dnow_transform_points4_2d_no_rot)
450GLNAME( _mesa_3dnow_transform_points4_2d_no_rot ):
451
452    PUSH_L    ( ESI )
453
454    MOV_L     ( ARG_DEST, ECX )
455    MOV_L     ( ARG_MATRIX, ESI )
456    MOV_L     ( ARG_SOURCE, EAX )
457    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
458    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
459    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
460    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
461
462    PUSH_L    ( EDI )
463
464    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
465    MOV_L     ( ESI, ECX )
466    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
467    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
468    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
469
470    TEST_L    ( ESI, ESI )
471    JZ        ( LLBL( G3TP2NRR_3 ) )
472
473    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
474    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
475
476    MOVQ      ( REGOFF(48, ECX), MM1 )	/* m31             | m30             */
477
478ALIGNTEXT16
479LLBL( G3TP2NRR_2 ):
480
481    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
482
483    MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
484    MOVQ      ( REGOFF(8, EAX), MM5 )	/* x3              | x2              */
485
486    ADD_L     ( EDI, EAX )		/* next vertex                       */
487    PREFETCH  ( REGIND(EAX) )
488
489    PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
490    MOVQ      ( MM5, MM6 )		/* x3              | x2              */
491
492    ADD_L     ( CONST(16), EDX )	/* next r                            */
493    PUNPCKHDQ ( MM6, MM6 )		/* x3              | x3              */
494
495    PFMUL     ( MM1, MM6 )		/* x3*m31          | x3*m30          */
496    PFADD     ( MM4, MM6 )		/* x1*m11+x3*m31   | x0*m00+x3*m30   */
497
498    MOVQ      ( MM6, REGOFF(-16, EDX) )	/* write r0, r1                      */
499    MOVQ      ( MM5, REGOFF(-8, EDX) )	/* write r2, r3                      */
500
501    DEC_L     ( ESI )			/* decrement vertex counter          */
502
503    JNZ       ( LLBL( G3TP2NRR_2 ) )	/* cnt > 0 ? -> process next vertex  */
504
505LLBL( G3TP2NRR_3 ):
506
507    FEMMS
508    POP_L     ( EDI )
509    POP_L     ( ESI )
510    RET
511
512
513
514
515ALIGNTEXT16
516GLOBL GLNAME( _mesa_3dnow_transform_points4_identity )
517HIDDEN(_mesa_3dnow_transform_points4_identity)
518GLNAME( _mesa_3dnow_transform_points4_identity ):
519
520    PUSH_L    ( ESI )
521
522    MOV_L     ( ARG_DEST, ECX )
523    MOV_L     ( ARG_MATRIX, ESI )
524    MOV_L     ( ARG_SOURCE, EAX )
525    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
526    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
527    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
528    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
529
530    PUSH_L    ( EDI )
531
532    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
533    MOV_L     ( ESI, ECX )
534    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
535    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
536    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
537
538    TEST_L    ( ESI, ESI )
539    JZ        ( LLBL( G3TPIR_2 ) )
540
541ALIGNTEXT16
542LLBL( G3TPIR_1 ):
543
544    PREFETCHW ( REGOFF(32, EDX) )       /* prefetch 2 vertices ahead         */
545
546    MOVQ      ( REGIND(EAX), MM0 )	/* x1              | x0              */
547    MOVQ      ( REGOFF(8, EAX), MM1 )	/* x3              | x2              */
548
549    ADD_L     ( EDI, EAX )		/* next vertex                       */
550    PREFETCH  ( REGIND(EAX) )
551
552    ADD_L     ( CONST(16), EDX )	/* next r                            */
553    MOVQ      ( MM0, REGOFF(-16, EDX) )	/* r1              | r0              */
554
555    MOVQ      ( MM1, REGOFF(-8, EDX) )	/* r3              | r2              */
556
557    DEC_L     ( ESI )			/* decrement vertex counter          */
558    JNZ       ( LLBL( G3TPIR_1 ) )	/* cnt > 0 ? -> process next vertex  */
559
560LLBL( G3TPIR_2 ):
561
562    FEMMS
563    POP_L     ( EDI )
564    POP_L     ( ESI )
565    RET
566#endif
567
568#if defined (__ELF__) && defined (__linux__)
569	.section .note.GNU-stack,"",%progbits
570#endif
571