1
2/*
3 * Mesa 3-D graphics library
4 * Version:  3.5
5 *
6 * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included
16 * in all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
21 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26/*
27 * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
28 * with macros like CONST, LLBL that expand to CONCAT(...).  Putting spaces
29 * in there will break the build on some platforms.
30 */
31
32#include "assyntax.h"
33#include "matypes.h"
34#include "xform_args.h"
35
36	SEG_TEXT
37
38#define FP_ONE		1065353216
39#define FP_ZERO		0
40
41#define SRC0		REGOFF(0, ESI)
42#define SRC1		REGOFF(4, ESI)
43#define SRC2		REGOFF(8, ESI)
44#define SRC3		REGOFF(12, ESI)
45#define DST0		REGOFF(0, EDI)
46#define DST1		REGOFF(4, EDI)
47#define DST2		REGOFF(8, EDI)
48#define DST3		REGOFF(12, EDI)
49#define MAT0		REGOFF(0, EDX)
50#define MAT1		REGOFF(4, EDX)
51#define MAT2		REGOFF(8, EDX)
52#define MAT3		REGOFF(12, EDX)
53#define MAT4		REGOFF(16, EDX)
54#define MAT5		REGOFF(20, EDX)
55#define MAT6		REGOFF(24, EDX)
56#define MAT7		REGOFF(28, EDX)
57#define MAT8		REGOFF(32, EDX)
58#define MAT9		REGOFF(36, EDX)
59#define MAT10		REGOFF(40, EDX)
60#define MAT11		REGOFF(44, EDX)
61#define MAT12		REGOFF(48, EDX)
62#define MAT13		REGOFF(52, EDX)
63#define MAT14		REGOFF(56, EDX)
64#define MAT15		REGOFF(60, EDX)
65
66
67ALIGNTEXT16
68GLOBL GLNAME( _mesa_x86_transform_points4_general )
69HIDDEN(_mesa_x86_transform_points4_general)
70GLNAME( _mesa_x86_transform_points4_general ):
71
72#define FRAME_OFFSET 8
73	PUSH_L( ESI )
74	PUSH_L( EDI )
75
76	MOV_L( ARG_SOURCE, ESI )
77	MOV_L( ARG_DEST, EDI )
78
79	MOV_L( ARG_MATRIX, EDX )
80	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
81
82	TEST_L( ECX, ECX )
83	JZ( LLBL(x86_p4_gr_done) )
84
85	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
86	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
87
88	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
89	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
90
91	SHL_L( CONST(4), ECX )
92	MOV_L( REGOFF(V4F_START, ESI), ESI )
93
94	MOV_L( REGOFF(V4F_START, EDI), EDI )
95	ADD_L( EDI, ECX )
96
97ALIGNTEXT16
98LLBL(x86_p4_gr_loop):
99
100	FLD_S( SRC0 )			/* F4 */
101	FMUL_S( MAT0 )
102	FLD_S( SRC0 )			/* F5 F4 */
103	FMUL_S( MAT1 )
104	FLD_S( SRC0 )			/* F6 F5 F4 */
105	FMUL_S( MAT2 )
106	FLD_S( SRC0 )			/* F7 F6 F5 F4 */
107	FMUL_S( MAT3 )
108
109	FLD_S( SRC1 )			/* F0 F7 F6 F5 F4 */
110	FMUL_S( MAT4 )
111	FLD_S( SRC1 )			/* F1 F0 F7 F6 F5 F4 */
112	FMUL_S( MAT5 )
113	FLD_S( SRC1 )			/* F2 F1 F0 F7 F6 F5 F4 */
114	FMUL_S( MAT6 )
115	FLD_S( SRC1 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */
116	FMUL_S( MAT7 )
117
118	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */
119	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */
120	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */
121	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */
122	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */
123	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */
124
125	FLD_S( SRC2 )			/* F0 F7 F6 F5 F4 */
126	FMUL_S( MAT8 )
127	FLD_S( SRC2 )			/* F1 F0 F7 F6 F5 F4 */
128	FMUL_S( MAT9 )
129	FLD_S( SRC2 )			/* F2 F1 F0 F7 F6 F5 F4 */
130	FMUL_S( MAT10 )
131	FLD_S( SRC2 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */
132	FMUL_S( MAT11 )
133
134	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */
135	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */
136	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */
137	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */
138	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */
139	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */
140
141	FLD_S( SRC3 )			/* F0 F7 F6 F5 F4 */
142	FMUL_S( MAT12 )
143	FLD_S( SRC3 )			/* F1 F0 F7 F6 F5 F4 */
144	FMUL_S( MAT13 )
145	FLD_S( SRC3 )			/* F2 F1 F0 F7 F6 F5 F4 */
146	FMUL_S( MAT14 )
147	FLD_S( SRC3 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */
148	FMUL_S( MAT15 )
149
150	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */
151	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */
152	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */
153	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */
154	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */
155	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */
156
157	FXCH( ST(3) )			/* F4 F6 F5 F7 */
158	FSTP_S( DST0 )		/* F6 F5 F7 */
159	FXCH( ST(1) )			/* F5 F6 F7 */
160	FSTP_S( DST1 )		/* F6 F7 */
161	FSTP_S( DST2 )		/* F7 */
162	FSTP_S( DST3 )		/* */
163
164LLBL(x86_p4_gr_skip):
165
166	ADD_L( CONST(16), EDI )
167	ADD_L( EAX, ESI )
168	CMP_L( ECX, EDI )
169	JNE( LLBL(x86_p4_gr_loop) )
170
171LLBL(x86_p4_gr_done):
172
173	POP_L( EDI )
174	POP_L( ESI )
175	RET
176#undef FRAME_OFFSET
177
178
179
180
181ALIGNTEXT16
182GLOBL GLNAME( _mesa_x86_transform_points4_perspective )
183HIDDEN(_mesa_x86_transform_points4_perspective)
184GLNAME( _mesa_x86_transform_points4_perspective ):
185
186#define FRAME_OFFSET 12
187	PUSH_L( ESI )
188	PUSH_L( EDI )
189	PUSH_L( EBX )
190
191	MOV_L( ARG_SOURCE, ESI )
192	MOV_L( ARG_DEST, EDI )
193
194	MOV_L( ARG_MATRIX, EDX )
195	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
196
197	TEST_L( ECX, ECX )
198	JZ( LLBL(x86_p4_pr_done) )
199
200	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
201	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
202
203	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
204	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
205
206	SHL_L( CONST(4), ECX )
207	MOV_L( REGOFF(V4F_START, ESI), ESI )
208
209	MOV_L( REGOFF(V4F_START, EDI), EDI )
210	ADD_L( EDI, ECX )
211
212ALIGNTEXT16
213LLBL(x86_p4_pr_loop):
214
215	FLD_S( SRC0 )			/* F4 */
216	FMUL_S( MAT0 )
217
218	FLD_S( SRC1 )			/* F5 F4 */
219	FMUL_S( MAT5 )
220
221	FLD_S( SRC2 )			/* F0 F5 F4 */
222	FMUL_S( MAT8 )
223	FLD_S( SRC2 )			/* F1 F0 F5 F4 */
224	FMUL_S( MAT9 )
225	FLD_S( SRC2 )			/* F6 F1 F0 F5 F4 */
226	FMUL_S( MAT10 )
227
228	FXCH( ST(2) )			/* F0 F1 F6 F5 F4 */
229	FADDP( ST0, ST(4) )		/* F1 F6 F5 F4 */
230	FADDP( ST0, ST(2) )		/* F6 F5 F4 */
231
232	FLD_S( SRC3 )			/* F2 F6 F5 F4 */
233	FMUL_S( MAT14 )
234
235	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
236
237	MOV_L( SRC2, EBX )
238	XOR_L( CONST(-2147483648), EBX )/* change sign */
239
240	FXCH( ST(2) )			/* F4 F5 F6 */
241	FSTP_S( DST0 )		/* F5 F6 */
242	FSTP_S( DST1 )		/* F6 */
243	FSTP_S( DST2 )		/* */
244	MOV_L( EBX, DST3 )
245
246LLBL(x86_p4_pr_skip):
247
248	ADD_L( CONST(16), EDI )
249	ADD_L( EAX, ESI )
250	CMP_L( ECX, EDI )
251	JNE( LLBL(x86_p4_pr_loop) )
252
253LLBL(x86_p4_pr_done):
254
255	POP_L( EBX )
256	POP_L( EDI )
257	POP_L( ESI )
258	RET
259#undef FRAME_OFFSET
260
261
262
263
264ALIGNTEXT16
265GLOBL GLNAME( _mesa_x86_transform_points4_3d )
266HIDDEN(_mesa_x86_transform_points4_3d)
267GLNAME( _mesa_x86_transform_points4_3d ):
268
269#define FRAME_OFFSET 12
270	PUSH_L( ESI )
271	PUSH_L( EDI )
272	PUSH_L( EBX )
273
274	MOV_L( ARG_SOURCE, ESI )
275	MOV_L( ARG_DEST, EDI )
276
277	MOV_L( ARG_MATRIX, EDX )
278	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
279
280	TEST_L( ECX, ECX )
281	JZ( LLBL(x86_p4_3dr_done) )
282
283	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
284	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
285
286	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
287	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
288
289	SHL_L( CONST(4), ECX )
290	MOV_L( REGOFF(V4F_START, ESI), ESI )
291
292	MOV_L( REGOFF(V4F_START, EDI), EDI )
293	ADD_L( EDI, ECX )
294
295ALIGNTEXT16
296LLBL(x86_p4_3dr_loop):
297
298	FLD_S( SRC0 )			/* F4 */
299	FMUL_S( MAT0 )
300	FLD_S( SRC0 )			/* F5 F4 */
301	FMUL_S( MAT1 )
302	FLD_S( SRC0 )			/* F6 F5 F4 */
303	FMUL_S( MAT2 )
304
305	FLD_S( SRC1 )			/* F0 F6 F5 F4 */
306	FMUL_S( MAT4 )
307	FLD_S( SRC1 )			/* F1 F0 F6 F5 F4 */
308	FMUL_S( MAT5 )
309	FLD_S( SRC1 )			/* F2 F1 F0 F6 F5 F4 */
310	FMUL_S( MAT6 )
311
312	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
313	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
314	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
315	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
316
317	FLD_S( SRC2 )			/* F0 F6 F5 F4 */
318	FMUL_S( MAT8 )
319	FLD_S( SRC2 )			/* F1 F0 F6 F5 F4 */
320	FMUL_S( MAT9 )
321	FLD_S( SRC2 )			/* F2 F1 F0 F6 F5 F4 */
322	FMUL_S( MAT10 )
323
324	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
325	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
326	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
327	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
328
329	FLD_S( SRC3 )			/* F0 F6 F5 F4 */
330	FMUL_S( MAT12 )
331	FLD_S( SRC3 )			/* F1 F0 F6 F5 F4 */
332	FMUL_S( MAT13 )
333	FLD_S( SRC3 )			/* F2 F1 F0 F6 F5 F4 */
334	FMUL_S( MAT14 )
335
336	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
337	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
338	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
339	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
340
341	MOV_L( SRC3, EBX )
342
343	FXCH( ST(2) )			/* F4 F5 F6 */
344	FSTP_S( DST0 )		/* F5 F6 */
345	FSTP_S( DST1 )		/* F6 */
346	FSTP_S( DST2 )		/* */
347	MOV_L( EBX, DST3 )
348
349LLBL(x86_p4_3dr_skip):
350
351	ADD_L( CONST(16), EDI )
352	ADD_L( EAX, ESI )
353	CMP_L( ECX, EDI )
354	JNE( LLBL(x86_p4_3dr_loop) )
355
356LLBL(x86_p4_3dr_done):
357
358	POP_L( EBX )
359	POP_L( EDI )
360	POP_L( ESI )
361	RET
362#undef FRAME_OFFSET
363
364
365
366
367ALIGNTEXT16
368GLOBL GLNAME(_mesa_x86_transform_points4_3d_no_rot)
369HIDDEN(_mesa_x86_transform_points4_3d_no_rot)
370GLNAME(_mesa_x86_transform_points4_3d_no_rot):
371
372#define FRAME_OFFSET 12
373	PUSH_L( ESI )
374	PUSH_L( EDI )
375	PUSH_L( EBX )
376
377	MOV_L( ARG_SOURCE, ESI )
378	MOV_L( ARG_DEST, EDI )
379
380	MOV_L( ARG_MATRIX, EDX )
381	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
382
383	TEST_L( ECX, ECX )
384	JZ( LLBL(x86_p4_3dnrr_done) )
385
386	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
387	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
388
389	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
390	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
391
392	SHL_L( CONST(4), ECX )
393	MOV_L( REGOFF(V4F_START, ESI), ESI )
394
395	MOV_L( REGOFF(V4F_START, EDI), EDI )
396	ADD_L( EDI, ECX )
397
398ALIGNTEXT16
399LLBL(x86_p4_3dnrr_loop):
400
401	FLD_S( SRC0 )			/* F4 */
402	FMUL_S( MAT0 )
403
404	FLD_S( SRC1 )			/* F5 F4 */
405	FMUL_S( MAT5 )
406
407	FLD_S( SRC2 )			/* F6 F5 F4 */
408	FMUL_S( MAT10 )
409
410	FLD_S( SRC3 )			/* F0 F6 F5 F4 */
411	FMUL_S( MAT12 )
412	FLD_S( SRC3 )			/* F1 F0 F6 F5 F4 */
413	FMUL_S( MAT13 )
414	FLD_S( SRC3 )			/* F2 F1 F0 F6 F5 F4 */
415	FMUL_S( MAT14 )
416
417	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
418	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
419	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
420	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
421
422	MOV_L( SRC3, EBX )
423
424	FXCH( ST(2) )			/* F4 F5 F6 */
425	FSTP_S( DST0   )		/* F5 F6 */
426	FSTP_S( DST1   )		/* F6 */
427	FSTP_S( DST2   )		/* */
428	MOV_L( EBX, DST3 )
429
430LLBL(x86_p4_3dnrr_skip):
431
432	ADD_L( CONST(16), EDI )
433	ADD_L( EAX, ESI )
434	CMP_L( ECX, EDI )
435	JNE( LLBL(x86_p4_3dnrr_loop) )
436
437LLBL(x86_p4_3dnrr_done):
438
439	POP_L( EBX )
440	POP_L( EDI )
441	POP_L( ESI )
442	RET
443#undef FRAME_OFFSET
444
445
446
447
448ALIGNTEXT16
449GLOBL GLNAME( _mesa_x86_transform_points4_2d )
450HIDDEN(_mesa_x86_transform_points4_2d)
451GLNAME( _mesa_x86_transform_points4_2d ):
452
453#define FRAME_OFFSET 16
454	PUSH_L( ESI )
455	PUSH_L( EDI )
456	PUSH_L( EBX )
457	PUSH_L( EBP )
458
459	MOV_L( ARG_SOURCE, ESI )
460	MOV_L( ARG_DEST, EDI )
461
462	MOV_L( ARG_MATRIX, EDX )
463	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
464
465	TEST_L( ECX, ECX )
466	JZ( LLBL(x86_p4_2dr_done) )
467
468	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
469	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
470
471	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
472	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
473
474	SHL_L( CONST(4), ECX )
475	MOV_L( REGOFF(V4F_START, ESI), ESI )
476
477	MOV_L( REGOFF(V4F_START, EDI), EDI )
478	ADD_L( EDI, ECX )
479
480ALIGNTEXT16
481LLBL(x86_p4_2dr_loop):
482
483	FLD_S( SRC0 )			/* F4 */
484	FMUL_S( MAT0 )
485	FLD_S( SRC0 )			/* F5 F4 */
486	FMUL_S( MAT1 )
487
488	FLD_S( SRC1 )			/* F0 F5 F4 */
489	FMUL_S( MAT4 )
490	FLD_S( SRC1 )			/* F1 F0 F5 F4 */
491	FMUL_S( MAT5 )
492
493	FXCH( ST(1) )			/* F0 F1 F5 F4 */
494	FADDP( ST0, ST(3) )		/* F1 F5 F4 */
495	FADDP( ST0, ST(1) )		/* F5 F4 */
496
497	FLD_S( SRC3 )			/* F0 F5 F4 */
498	FMUL_S( MAT12 )
499	FLD_S( SRC3 )			/* F1 F0 F5 F4 */
500	FMUL_S( MAT13 )
501
502	FXCH( ST(1) )			/* F0 F1 F5 F4 */
503	FADDP( ST0, ST(3) )		/* F1 F5 F4 */
504	FADDP( ST0, ST(1) )		/* F5 F4 */
505
506	MOV_L( SRC2, EBX )
507	MOV_L( SRC3, EBP )
508
509	FXCH( ST(1) )			/* F4 F5 */
510	FSTP_S( DST0 )		/* F5 */
511	FSTP_S( DST1 )		/* */
512	MOV_L( EBX, DST2 )
513	MOV_L( EBP, DST3 )
514
515LLBL(x86_p4_2dr_skip):
516
517	ADD_L( CONST(16), EDI )
518	ADD_L( EAX, ESI )
519	CMP_L( ECX, EDI )
520	JNE( LLBL(x86_p4_2dr_loop) )
521
522LLBL(x86_p4_2dr_done):
523
524	POP_L( EBP )
525	POP_L( EBX )
526	POP_L( EDI )
527	POP_L( ESI )
528	RET
529#undef FRAME_OFFSET
530
531
532
533
534ALIGNTEXT16
535GLOBL GLNAME( _mesa_x86_transform_points4_2d_no_rot )
536HIDDEN(_mesa_x86_transform_points4_2d_no_rot)
537GLNAME( _mesa_x86_transform_points4_2d_no_rot ):
538
539#define FRAME_OFFSET 16
540	PUSH_L( ESI )
541	PUSH_L( EDI )
542	PUSH_L( EBX )
543	PUSH_L( EBP )
544
545	MOV_L( ARG_SOURCE, ESI )
546	MOV_L( ARG_DEST, EDI )
547
548	MOV_L( ARG_MATRIX, EDX )
549	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
550
551	TEST_L( ECX, ECX )
552	JZ( LLBL(x86_p4_2dnrr_done) )
553
554	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
555	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
556
557	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
558	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
559
560	SHL_L( CONST(4), ECX )
561	MOV_L( REGOFF(V4F_START, ESI), ESI )
562
563	MOV_L( REGOFF(V4F_START, EDI), EDI )
564	ADD_L( EDI, ECX )
565
566ALIGNTEXT16
567LLBL(x86_p4_2dnrr_loop):
568
569	FLD_S( SRC0 )			/* F4 */
570	FMUL_S( MAT0 )
571
572	FLD_S( SRC1 )			/* F5 F4 */
573	FMUL_S( MAT5 )
574
575	FLD_S( SRC3 )			/* F0 F5 F4 */
576	FMUL_S( MAT12 )
577	FLD_S( SRC3 )			/* F1 F0 F5 F4 */
578	FMUL_S( MAT13 )
579
580	FXCH( ST(1) )			/* F0 F1 F5 F4 */
581	FADDP( ST0, ST(3) )		/* F1 F5 F4 */
582	FADDP( ST0, ST(1) )		/* F5 F4 */
583
584	MOV_L( SRC2, EBX )
585	MOV_L( SRC3, EBP )
586
587	FXCH( ST(1) )			/* F4 F5 */
588	FSTP_S( DST0   )		/* F5 */
589	FSTP_S( DST1   )		/* */
590	MOV_L( EBX, DST2 )
591	MOV_L( EBP, DST3 )
592
593LLBL(x86_p4_2dnrr_skip):
594
595	ADD_L( CONST(16), EDI )
596	ADD_L( EAX, ESI )
597	CMP_L( ECX, EDI )
598	JNE( LLBL(x86_p4_2dnrr_loop) )
599
600LLBL(x86_p4_2dnrr_done):
601
602	POP_L( EBP )
603	POP_L( EBX )
604	POP_L( EDI )
605	POP_L( ESI )
606	RET
607#undef FRAME_OFFSET
608
609
610
611
612ALIGNTEXT16
613GLOBL GLNAME( _mesa_x86_transform_points4_identity )
614HIDDEN(_mesa_x86_transform_points4_identity)
615GLNAME( _mesa_x86_transform_points4_identity ):
616
617#define FRAME_OFFSET 12
618	PUSH_L( ESI )
619	PUSH_L( EDI )
620	PUSH_L( EBX )
621
622	MOV_L( ARG_SOURCE, ESI )
623	MOV_L( ARG_DEST, EDI )
624
625	MOV_L( ARG_MATRIX, EDX )
626	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
627
628	TEST_L( ECX, ECX )
629	JZ( LLBL(x86_p4_ir_done) )
630
631	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
632	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
633
634	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
635	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
636
637	SHL_L( CONST(4), ECX )
638	MOV_L( REGOFF(V4F_START, ESI), ESI )
639
640	MOV_L( REGOFF(V4F_START, EDI), EDI )
641	ADD_L( EDI, ECX )
642
643	CMP_L( ESI, EDI )
644	JE( LLBL(x86_p4_ir_done) )
645
646ALIGNTEXT16
647LLBL(x86_p4_ir_loop):
648
649	MOV_L( SRC0, EBX )
650	MOV_L( SRC1, EDX )
651
652	MOV_L( EBX, DST0 )
653	MOV_L( EDX, DST1 )
654
655	MOV_L( SRC2, EBX )
656	MOV_L( SRC3, EDX )
657
658	MOV_L( EBX, DST2 )
659	MOV_L( EDX, DST3 )
660
661LLBL(x86_p4_ir_skip):
662
663	ADD_L( CONST(16), EDI )
664	ADD_L( EAX, ESI )
665	CMP_L( ECX, EDI )
666	JNE( LLBL(x86_p4_ir_loop) )
667
668LLBL(x86_p4_ir_done):
669
670	POP_L( EBX )
671	POP_L( EDI )
672	POP_L( ESI )
673	RET
674
675#if defined (__ELF__) && defined (__linux__)
676	.section .note.GNU-stack,"",%progbits
677#endif
678