1
2/*
3 * Mesa 3-D graphics library
4 * Version:  5.1
5 *
6 * Copyright (C) 1999-2003  Brian Paul   All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included
16 * in all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
21 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26/*
27 * 3Dnow assembly code by Holger Waechtler
28 */
29
30#ifdef USE_3DNOW_ASM
31
32#include "assyntax.h"
33#include "matypes.h"
34#include "norm_args.h"
35
36        SEG_TEXT
37
38#define M(i)    REGOFF(i * 4, ECX)
39#define STRIDE  REGOFF(12, ESI)
40
41
42ALIGNTEXT16
43GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals)
44HIDDEN(_mesa_3dnow_transform_normalize_normals)
45GLNAME(_mesa_3dnow_transform_normalize_normals):
46
47#define FRAME_OFFSET 12
48
49    PUSH_L     ( EDI )
50    PUSH_L     ( ESI )
51    PUSH_L     ( EBP )
52
53    MOV_L      ( ARG_LENGTHS, EDI )
54    MOV_L      ( ARG_IN, ESI )
55    MOV_L      ( ARG_DEST, EAX )
56    MOV_L      ( REGOFF(V4F_COUNT, ESI), EBP ) /*  dest->count = in->count   */
57    MOV_L      ( EBP, REGOFF(V4F_COUNT, EAX) )
58    MOV_L      ( REGOFF(V4F_START, ESI), EDX ) /*  in->start    */
59    MOV_L      ( REGOFF(V4F_START, EAX), EAX ) /*  dest->start  */
60    MOV_L      ( ARG_MAT, ECX )
61    MOV_L      ( REGOFF(MATRIX_INV, ECX), ECX ) /*  mat->inv     */
62
63    CMP_L      ( CONST(0), EBP )        /*   count > 0 ??  */
64    JE         ( LLBL (G3TN_end) )
65
66    MOV_L      ( REGOFF (V4F_COUNT, ESI), EBP )
67    FEMMS
68
69    PUSH_L     ( EBP )
70    PUSH_L     ( EAX )
71    PUSH_L     ( EDX )                  /*  save counter & pointer for   */
72                                        /*  the normalize pass           */
73#undef  FRAME_OFFSET
74#define FRAME_OFFSET 24
75
76    MOVQ       ( M(0), MM3 )            /*  m1              | m0         */
77    MOVQ       ( M(4), MM4 )            /*  m5              | m4         */
78
79    MOVD       ( M(2), MM5 )            /*                  | m2         */
80    PUNPCKLDQ  ( M(6), MM5 )            /*  m6              | m2         */
81
82    MOVQ       ( M(8), MM6 )            /*  m9              | m8         */
83    MOVQ       ( M(10), MM7 )           /*                  | m10        */
84
85    CMP_L      ( CONST(0), EDI )        /*  lengths == 0 ?                 */
86    JNE        ( LLBL (G3TN_scale_end ) )
87
88    MOVD       ( ARG_SCALE, MM0 )       /*               | scale           */
89    PUNPCKLDQ  ( MM0, MM0 )             /* scale         | scale           */
90
91    PFMUL      ( MM0, MM3 )             /* scale * m1    | scale * m0      */
92    PFMUL      ( MM0, MM4 )             /* scale * m5    | scale * m4      */
93    PFMUL      ( MM0, MM5 )             /* scale * m6    | scale * m2      */
94    PFMUL      ( MM0, MM6 )             /* scale * m9    | scale * m8      */
95    PFMUL      ( MM0, MM7 )             /*               | scale * m10     */
96
97ALIGNTEXT32
98LLBL (G3TN_scale_end):
99LLBL (G3TN_transform):
100    MOVQ       ( REGIND (EDX), MM0 )    /*  x1              | x0         */
101    MOVD       ( REGOFF (8, EDX), MM2 ) /*                  | x2         */
102
103    MOVQ       ( MM0, MM1 )             /*  x1              | x0           */
104    PUNPCKLDQ  ( MM2, MM2 )             /*  x2              | x2           */
105
106    PFMUL      ( MM3, MM0 )             /*  x1*m1           | x0*m0        */
107    ADD_L      ( CONST(16), EAX )       /*  next r                         */
108
109    PREFETCHW  ( REGIND(EAX) )
110
111    PFMUL      ( MM4, MM1 )             /*  x1*m5           | x0*m4        */
112    PFACC      ( MM1, MM0 )             /*  x0*m4+x1*m5     | x0*m0+x1*m1  */
113
114    PFMUL      ( MM5, MM2 )             /*  x2*m6           | x2*m2        */
115    PFADD      ( MM2, MM0 )             /* x0*m4+x1*m5+x2*m6| x0*m0+...+x2**/
116
117    MOVQ       ( REGIND (EDX), MM1 )    /*  x1           | x0              */
118    MOVQ       ( MM0, REGOFF(-16, EAX) ) /* write r0, r1                   */
119
120    PFMUL      ( MM6, MM1 )             /* x1*m9         | x0*m8           */
121    MOVD       ( REGOFF (8, EDX), MM2 ) /*               | x2              */
122
123    PFMUL      ( MM7, MM2 )             /*               | x2*m10          */
124    PFACC      ( MM1, MM1 )             /*  *not used*   | x0*m8+x1*m9     */
125
126    PFADD      ( MM2, MM1 )             /*  *not used*   | x0*m8+x1*m9+x2*m*/
127    ADD_L      ( STRIDE, EDX )          /*  next normal                    */
128
129    PREFETCH   ( REGIND(EDX) )
130
131    MOVD       ( MM1, REGOFF(-8, EAX) ) /*  write r2                       */
132    SUB_L      ( CONST(1), EBP )                  /*  decrement normal counter       */
133    JNZ        ( LLBL (G3TN_transform) )
134
135
136    POP_L      ( EDX )                  /*  end of transform ---           */
137    POP_L      ( EAX )                  /*    now normalizing ...          */
138    POP_L      ( EBP )
139
140    CMP_L      ( CONST(0), EDI )        /*  lengths == 0 ?                 */
141    JE         ( LLBL (G3TN_norm ) )    /*  calculate lengths              */
142
143
144ALIGNTEXT32
145LLBL (G3TN_norm_w_lengths):
146
147    PREFETCHW  ( REGOFF(12,EAX) )
148
149    MOVQ       ( REGIND(EAX), MM0 )     /*  x1              | x0           */
150    MOVD       ( REGOFF(8, EAX), MM1 )  /*                  | x2           */
151
152    MOVD       ( REGIND (EDI), MM3 )    /*                  | length (x)   */
153    PFMUL      ( MM3, MM1 )             /*                  | x2 (normalize*/
154
155    PUNPCKLDQ  ( MM3, MM3 )             /*  length (x)      | length (x)   */
156    PFMUL      ( MM3, MM0 )             /*  x1 (normalized) | x0 (normalize*/
157
158    ADD_L      ( STRIDE, EDX )          /*  next normal                    */
159    ADD_L      ( CONST(4), EDI )        /*  next length                    */
160
161    PREFETCH   ( REGIND(EDI) )
162
163    MOVQ       ( MM0, REGIND(EAX) )     /*  write new x0, x1               */
164    MOVD       ( MM1, REGOFF(8, EAX) )  /*  write new x2                   */
165
166    ADD_L      ( CONST(16), EAX )       /*  next r                         */
167    SUB_L      ( CONST(1), EBP )        /*  decrement normal counter       */
168
169    JNZ        ( LLBL (G3TN_norm_w_lengths) )
170    JMP        ( LLBL (G3TN_exit_3dnow) )
171
172ALIGNTEXT32
173LLBL (G3TN_norm):
174
175    PREFETCHW  ( REGIND(EAX) )
176
177    MOVQ       ( REGIND (EAX), MM0 )    /*  x1             | x0           */
178    MOVD       ( REGOFF(8, EAX), MM1 )  /*                 | x2           */
179
180    MOVQ       ( MM0, MM3 )             /*  x1              | x0           */
181    MOVQ       ( MM1, MM4 )             /*                  | x2           */
182
183    PFMUL      ( MM0, MM3 )             /*  x1*x1           | x0*x0        */
184    ADD_L      ( CONST(16), EAX )       /*  next r                         */
185
186    PFMUL      ( MM1, MM4 )             /*                  | x2*x2        */
187    PFADD      ( MM4, MM3 )             /*                  | x0*x0+x2*x2  */
188
189    PFACC      ( MM3, MM3 )             /* **not used**    | x0*x0+x1*x1+x2**/
190    PFRSQRT    ( MM3, MM5 )             /*  1/sqrt (x0*x0+x1*x1+x2*x2)     */
191
192    MOVQ       ( MM5, MM4 )
193    PUNPCKLDQ  ( MM3, MM3 )
194
195    SUB_L      ( CONST(1), EBP )                  /*  decrement normal counter       */
196    PFMUL      ( MM5, MM5 )
197
198    PFRSQIT1   ( MM3, MM5 )
199    PFRCPIT2   ( MM4, MM5 )
200
201    PFMUL      ( MM5, MM0 )             /*  x1 (normalized) | x0 (normalize*/
202
203    MOVQ       ( MM0, REGOFF(-16, EAX) ) /*  write new x0, x1              */
204    PFMUL      ( MM5, MM1 )             /*                 | x2 (normalize*/
205
206    MOVD       ( MM1, REGOFF(-8, EAX) ) /*  write new x2                  */
207    JNZ        ( LLBL (G3TN_norm) )
208
209LLBL (G3TN_exit_3dnow):
210    FEMMS
211
212LLBL (G3TN_end):
213    POP_L      ( EBP )
214    POP_L      ( ESI )
215    POP_L      ( EDI )
216    RET
217
218
219
220ALIGNTEXT16
221GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot)
222HIDDEN(_mesa_3dnow_transform_normalize_normals_no_rot)
223GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot):
224
225#undef FRAME_OFFSET
226#define FRAME_OFFSET 12
227
228    PUSH_L     ( EDI )
229    PUSH_L     ( ESI )
230    PUSH_L     ( EBP )
231
232    MOV_L      ( ARG_LENGTHS, EDI )
233    MOV_L      ( ARG_IN, ESI )
234    MOV_L      ( ARG_DEST, EAX )
235    MOV_L      ( REGOFF(V4F_COUNT, ESI), EBP ) /*  dest->count = in->count   */
236    MOV_L      ( EBP, REGOFF(V4F_COUNT, EAX) )
237    MOV_L      ( ARG_MAT, ECX )
238    MOV_L      ( REGOFF(V4F_START, EAX), EAX ) /*  dest->start  */
239    MOV_L      ( REGOFF(MATRIX_INV, ECX), ECX ) /*  mat->inv     */
240    MOV_L      ( REGOFF(V4F_START, ESI), EDX ) /*  in->start    */
241
242    CMP_L      ( CONST(0), EBP ) /*   count > 0 ??  */
243    JE         ( LLBL (G3TNNR_end) )
244
245    FEMMS
246
247    MOVD       ( M(0), MM0 )            /*               | m0                 */
248    PUNPCKLDQ  ( M(5), MM0 )            /* m5            | m0                 */
249
250    MOVD       ( M(10), MM2 )           /*               | m10                */
251    PUNPCKLDQ  ( MM2, MM2 )             /* m10           | m10                */
252
253    CMP_L      ( CONST(0), EDI )        /*  lengths == 0 ?                    */
254    JNE        ( LLBL (G3TNNR_scale_end ) )
255
256    MOVD       ( ARG_SCALE, MM7 )       /*               | scale              */
257    PUNPCKLDQ  ( MM7, MM7 )             /* scale         | scale              */
258
259    PFMUL      ( MM7, MM0 )             /* scale * m5    | scale * m0         */
260    PFMUL      ( MM7, MM2 )             /* scale * m10   | scale * m10        */
261
262ALIGNTEXT32
263LLBL (G3TNNR_scale_end):
264    CMP_L      ( CONST(0), EDI )        /* lengths == 0 ?                     */
265    JE         ( LLBL (G3TNNR_norm) )   /* need to calculate lengths          */
266
267    MOVD       ( REGIND(EDI), MM3 )     /*                 | length (x)       */
268
269
270ALIGNTEXT32
271LLBL (G3TNNR_norm_w_lengths):           /* use precalculated lengths          */
272
273    PREFETCHW  ( REGIND(EAX) )
274
275    MOVQ       ( REGIND(EDX), MM6 )     /* x1            | x0                 */
276    MOVD       ( REGOFF(8, EDX), MM7 )  /*               | x2                 */
277
278    PFMUL      ( MM0, MM6 )             /* x1*m5         | x0*m0              */
279    ADD_L      ( STRIDE, EDX )          /* next normal                        */
280
281    PREFETCH   ( REGIND(EDX) )
282
283    PFMUL      ( MM2, MM7 )             /*               | x2*m10             */
284    ADD_L      ( CONST(16), EAX )       /* next r                             */
285
286    PFMUL      ( MM3, MM7 )             /*               | x2 (normalized)  */
287    PUNPCKLDQ  ( MM3, MM3 )             /* length (x)    | length (x)       */
288
289    ADD_L      ( CONST(4), EDI )        /* next length                        */
290    PFMUL      ( MM3, MM6 )             /* x1 (normalized) | x0 (normalized)  */
291
292    SUB_L      ( CONST(1), EBP )        /* decrement normal counter           */
293    MOVQ       ( MM6, REGOFF(-16, EAX) ) /* write r0, r1                      */
294
295    MOVD       ( MM7, REGOFF(-8, EAX) ) /* write r2                           */
296    MOVD       ( REGIND(EDI), MM3 )     /*                 | length (x)       */
297
298    JNZ        ( LLBL (G3TNNR_norm_w_lengths) )
299    JMP        ( LLBL (G3TNNR_exit_3dnow) )
300
301ALIGNTEXT32
302LLBL (G3TNNR_norm):                     /* need to calculate lengths          */
303
304    PREFETCHW  ( REGIND(EAX) )
305
306    MOVQ       ( REGIND(EDX), MM6 )     /* x1              | x0               */
307    MOVD       ( REGOFF(8, EDX), MM7 )  /*                 | x2               */
308
309    PFMUL      ( MM0, MM6 )             /* x1*m5           | x0*m0            */
310    ADD_L      ( CONST(16), EAX )       /* next r                             */
311
312    PFMUL      ( MM2, MM7 )             /*                 | x2*m10           */
313    MOVQ       ( MM6, MM3 )             /* x1 (transformed)| x0 (transformed) */
314
315    MOVQ       ( MM7, MM4 )             /*                 | x2 (transformed) */
316    PFMUL      ( MM6, MM3 )             /* x1*x1           | x0*x0            */
317
318
319    PFMUL      ( MM7, MM4 )             /*                 | x2*x2            */
320    PFACC      ( MM3, MM3 )             /* **not used**    | x0*x0+x1*x1      */
321
322    PFADD      ( MM4, MM3 )             /*                 | x0*x0+x1*x1+x2*x2*/
323    ADD_L      ( STRIDE, EDX )          /* next normal            */
324
325    PREFETCH   ( REGIND(EDX) )
326
327    PFRSQRT    ( MM3, MM5 )             /* 1/sqrt (x0*x0+x1*x1+x2*x2)         */
328    MOVQ       ( MM5, MM4 )
329
330    PUNPCKLDQ  ( MM3, MM3 )
331    PFMUL      ( MM5, MM5 )
332
333    PFRSQIT1   ( MM3, MM5 )
334    SUB_L      ( CONST(1), EBP )        /* decrement normal counter           */
335
336    PFRCPIT2   ( MM4, MM5 )
337    PFMUL      ( MM5, MM6 )             /* x1 (normalized) | x0 (normalized)  */
338
339    MOVQ       ( MM6, REGOFF(-16, EAX) ) /* write r0, r1                      */
340    PFMUL      ( MM5, MM7 )             /*                 | x2 (normalized)  */
341
342    MOVD       ( MM7, REGOFF(-8, EAX) ) /* write r2                           */
343    JNZ        ( LLBL (G3TNNR_norm) )
344
345
346LLBL (G3TNNR_exit_3dnow):
347    FEMMS
348
349LLBL (G3TNNR_end):
350    POP_L      ( EBP )
351    POP_L      ( ESI )
352    POP_L      ( EDI )
353    RET
354
355
356
357
358
359
360ALIGNTEXT16
361GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot)
362HIDDEN(_mesa_3dnow_transform_rescale_normals_no_rot)
363GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot):
364
365#undef FRAME_OFFSET
366#define FRAME_OFFSET 12
367
368    PUSH_L     ( EDI )
369    PUSH_L     ( ESI )
370    PUSH_L     ( EBP )
371
372    MOV_L      ( ARG_IN, EAX )
373    MOV_L      ( ARG_DEST, EDX )
374    MOV_L      ( REGOFF(V4F_COUNT, EAX), EBP ) /*  dest->count = in->count   */
375    MOV_L      ( EBP, REGOFF(V4F_COUNT, EDX) )
376    MOV_L      ( ARG_IN, ESI )
377    MOV_L      ( ARG_MAT, ECX )
378    MOV_L      ( REGOFF(MATRIX_INV, ECX), ECX ) /*  mat->inv     */
379    MOV_L      ( REGOFF(V4F_START, EDX), EAX ) /*  dest->start  */
380    MOV_L      ( REGOFF(V4F_START, ESI), EDX ) /*  in->start    */
381
382    CMP_L      ( CONST(0), EBP )
383    JE         ( LLBL (G3TRNR_end) )
384
385    FEMMS
386
387    MOVD       ( ARG_SCALE, MM6 )       /*               | scale              */
388    PUNPCKLDQ  ( MM6, MM6 )             /* scale         | scale              */
389
390    MOVD       ( REGIND(ECX), MM0 )     /*               | m0                 */
391    PUNPCKLDQ  ( REGOFF(20, ECX), MM0 ) /* m5            | m0                 */
392
393    PFMUL      ( MM6, MM0 )             /* scale*m5      | scale*m0           */
394    MOVD       ( REGOFF(40, ECX), MM2 ) /*               | m10                */
395
396    PFMUL      ( MM6, MM2 )             /*               | scale*m10          */
397
398ALIGNTEXT32
399LLBL (G3TRNR_rescale):
400
401    PREFETCHW  ( REGIND(EAX) )
402
403    MOVQ       ( REGIND(EDX), MM4 )     /* x1            | x0                 */
404    MOVD       ( REGOFF(8, EDX), MM5 )  /*               | x2                 */
405
406    PFMUL      ( MM0, MM4 )             /* x1*m5         | x0*m0              */
407    ADD_L      ( STRIDE, EDX )          /* next normal                        */
408
409    PREFETCH   ( REGIND(EDX) )
410
411    PFMUL      ( MM2, MM5 )             /*               | x2*m10             */
412    ADD_L      ( CONST(16), EAX )       /* next r                             */
413
414    SUB_L      ( CONST(1), EBP )        /* decrement normal counter           */
415    MOVQ       ( MM4, REGOFF(-16, EAX) ) /* write r0, r1                      */
416
417    MOVD       ( MM5, REGOFF(-8, EAX) ) /* write r2                           */
418    JNZ        ( LLBL (G3TRNR_rescale) ) /* cnt > 0 ? -> process next normal  */
419
420    FEMMS
421
422LLBL (G3TRNR_end):
423    POP_L      ( EBP )
424    POP_L      ( ESI )
425    POP_L      ( EDI )
426    RET
427
428
429
430
431
432ALIGNTEXT16
433GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals)
434HIDDEN(_mesa_3dnow_transform_rescale_normals)
435GLNAME(_mesa_3dnow_transform_rescale_normals):
436
437#undef  FRAME_OFFSET
438#define FRAME_OFFSET 8
439
440    PUSH_L     ( EDI )
441    PUSH_L     ( ESI )
442
443    MOV_L      ( ARG_IN, ESI )
444    MOV_L      ( ARG_DEST, EAX )
445    MOV_L      ( ARG_MAT, ECX )
446    MOV_L      ( REGOFF(V4F_COUNT, ESI), EDI ) /*  dest->count = in->count   */
447    MOV_L      ( EDI, REGOFF(V4F_COUNT, EAX) )
448    MOV_L      ( REGOFF(V4F_START, EAX), EAX ) /*  dest->start  */
449    MOV_L      ( REGOFF(V4F_START, ESI), EDX ) /*  in->start    */
450    MOV_L      ( REGOFF(MATRIX_INV, ECX), ECX ) /*  mat->inv     */
451
452    CMP_L      ( CONST(0), EDI )
453    JE         ( LLBL (G3TR_end) )
454
455    FEMMS
456
457    MOVQ       ( REGIND(ECX), MM3 )     /* m1            | m0                 */
458
459    MOVQ       ( REGOFF(16,ECX), MM4 )  /* m5            | m4                 */
460    MOVD       ( ARG_SCALE, MM0 )       /* scale       */
461
462    MOVD       ( REGOFF(8,ECX), MM5 )   /*               | m2                 */
463    PUNPCKLDQ  ( MM0, MM0 )             /* scale         | scale              */
464
465    PUNPCKLDQ  ( REGOFF(24, ECX), MM5 )
466    PFMUL      ( MM0, MM3 )             /* scale*m1      | scale*m0           */
467
468    MOVQ       ( REGOFF(32, ECX), MM6 ) /* m9            | m8*/
469    PFMUL      ( MM0, MM4 )             /* scale*m5      | scale*m4           */
470
471    MOVD       ( REGOFF(40, ECX), MM7 ) /*               | m10                */
472    PFMUL      ( MM0, MM5 )             /* scale*m6      | scale*m2           */
473
474    PFMUL      ( MM0, MM6 )             /* scale*m9      | scale*m8           */
475
476    PFMUL      ( MM0, MM7 )             /*               | scale*m10          */
477
478ALIGNTEXT32
479LLBL (G3TR_rescale):
480
481    PREFETCHW  ( REGIND(EAX) )
482
483    MOVQ       ( REGIND(EDX), MM0 )     /* x1            | x0                 */
484    MOVD       ( REGOFF(8, EDX), MM2 )  /*               | x2                 */
485
486    MOVQ       ( MM0, MM1 )             /* x1            | x0                 */
487    PUNPCKLDQ  ( MM2, MM2 )             /* x2            | x2                 */
488
489    PFMUL      ( MM3, MM0 )             /* x1*m1         | x0*m0              */
490    ADD_L      ( CONST(16), EAX )       /* next r                             */
491
492    PFMUL      ( MM4, MM1 )             /* x1*m5         | x0*m4              */
493    PFACC      ( MM1, MM0 )             /* x0*m4+x1*m5   | x0*m0+x1*m1        */
494
495    MOVQ       ( REGIND(EDX), MM1 )     /* x1            | x0                 */
496
497    PFMUL      ( MM5, MM2 )             /* x2*m6         | x2*m2              */
498    PFADD      ( MM2, MM0 )             /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2  */
499
500    MOVD       ( REGOFF(8, EDX), MM2 )  /*               | x2                 */
501    ADD_L      ( STRIDE, EDX )          /* next normal                    */
502
503    PREFETCH   ( REGIND(EDX) )
504
505    MOVQ       ( MM0, REGOFF(-16, EAX) ) /* write r0, r1                      */
506    PFMUL      ( MM6, MM1 )             /* x1*m9         | x0*m8              */
507
508    PFMUL      ( MM7, MM2 )             /*               | x2*m10             */
509    PFACC      ( MM1, MM1 )             /* *not used*    | x0*m8+x1*m9        */
510
511    PFADD      ( MM2, MM1 )             /* *not used*    | x0*m8+x1*m9+x2*m10 */
512    MOVD       ( MM1, REGOFF(-8, EAX) ) /* write r2                           */
513
514    SUB_L      ( CONST(1), EDI )        /* decrement normal counter           */
515    JNZ        ( LLBL (G3TR_rescale) )
516
517    FEMMS
518
519LLBL (G3TR_end):
520    POP_L       ( ESI )
521    POP_L       ( EDI )
522    RET
523
524
525
526
527
528
529
530ALIGNTEXT16
531GLOBL GLNAME(_mesa_3dnow_transform_normals_no_rot)
532HIDDEN(_mesa_3dnow_transform_normals_no_rot)
533GLNAME(_mesa_3dnow_transform_normals_no_rot):
534
535#undef  FRAME_OFFSET
536#define FRAME_OFFSET 8
537
538    PUSH_L     ( EDI )
539    PUSH_L     ( ESI )
540
541    MOV_L      ( ARG_IN, ESI )
542    MOV_L      ( ARG_DEST, EAX )
543    MOV_L      ( ARG_MAT, ECX )
544    MOV_L      ( REGOFF(V4F_COUNT, ESI), EDI ) /*  dest->count = in->count   */
545    MOV_L      ( EDI, REGOFF(V4F_COUNT, EAX) )
546    MOV_L      ( REGOFF(V4F_START, EAX), EAX ) /*  dest->start  */
547    MOV_L      ( REGOFF(V4F_START, ESI), EDX ) /*  in->start    */
548    MOV_L      ( REGOFF(MATRIX_INV, ECX), ECX ) /*  mat->inv     */
549
550    CMP_L      ( CONST(0), EDI )
551    JE         ( LLBL (G3TNR_end) )
552
553    FEMMS
554
555    MOVD       ( REGIND(ECX), MM0 )     /*               | m0                 */
556    PUNPCKLDQ  ( REGOFF(20, ECX), MM0 ) /* m5            | m0                 */
557
558    MOVD       ( REGOFF(40, ECX), MM2 ) /*               | m10                */
559    PUNPCKLDQ  ( MM2, MM2 )             /* m10           | m10                */
560
561ALIGNTEXT32
562LLBL (G3TNR_transform):
563
564    PREFETCHW  ( REGIND(EAX) )
565
566    MOVQ       ( REGIND(EDX), MM4 )     /* x1            | x0                 */
567    MOVD       ( REGOFF(8, EDX), MM5 )  /*               | x2                 */
568
569    PFMUL      ( MM0, MM4 )             /* x1*m5         | x0*m0              */
570    ADD_L      ( STRIDE, EDX)           /* next normal      */
571
572    PREFETCH   ( REGIND(EDX) )
573
574    PFMUL      ( MM2, MM5 )             /*               | x2*m10             */
575    ADD_L      ( CONST(16), EAX )       /* next r                             */
576
577    SUB_L      ( CONST(1), EDI )        /* decrement normal counter           */
578    MOVQ       ( MM4, REGOFF(-16, EAX) ) /* write r0, r1                      */
579
580    MOVD       ( MM5, REGOFF(-8, EAX) ) /* write r2                           */
581    JNZ        ( LLBL (G3TNR_transform) )
582
583    FEMMS
584
585LLBL (G3TNR_end):
586    POP_L       ( ESI )
587    POP_L       ( EDI )
588    RET
589
590
591
592
593
594
595
596
597ALIGNTEXT16
598GLOBL GLNAME(_mesa_3dnow_transform_normals)
599HIDDEN(_mesa_3dnow_transform_normals)
600GLNAME(_mesa_3dnow_transform_normals):
601
602#undef  FRAME_OFFSET
603#define FRAME_OFFSET 8
604
605    PUSH_L     ( EDI )
606    PUSH_L     ( ESI )
607
608    MOV_L      ( ARG_IN, ESI )
609    MOV_L      ( ARG_DEST, EAX )
610    MOV_L      ( ARG_MAT, ECX )
611    MOV_L      ( REGOFF(V4F_COUNT, ESI), EDI ) /*  dest->count = in->count   */
612    MOV_L      ( EDI, REGOFF(V4F_COUNT, EAX) )
613    MOV_L      ( REGOFF(V4F_START, EAX), EAX ) /*  dest->start  */
614    MOV_L      ( REGOFF(V4F_START, ESI), EDX ) /*  in->start    */
615    MOV_L      ( REGOFF(MATRIX_INV, ECX), ECX ) /*  mat->inv     */
616
617    CMP_L      ( CONST(0), EDI )        /* count > 0 ??                       */
618    JE         ( LLBL (G3T_end) )
619
620    FEMMS
621
622    MOVQ       ( REGIND(ECX), MM3 )     /* m1            | m0                 */
623    MOVQ       ( REGOFF(16, ECX), MM4 ) /* m5            | m4                 */
624
625    MOVD       ( REGOFF(8, ECX), MM5 )  /*               | m2                 */
626    PUNPCKLDQ  ( REGOFF(24, ECX), MM5 ) /* m6            | m2                 */
627
628    MOVQ       ( REGOFF(32, ECX), MM6 ) /* m9            | m8                 */
629    MOVD       ( REGOFF(40, ECX), MM7 ) /*               | m10                */
630
631ALIGNTEXT32
632LLBL (G3T_transform):
633
634    PREFETCHW  ( REGIND(EAX) )
635
636    MOVQ       ( REGIND(EDX), MM0 )     /* x1            | x0                 */
637    MOVD       ( REGOFF(8, EDX), MM2 )  /*               | x2                 */
638
639    MOVQ       ( MM0, MM1 )             /* x1            | x0                 */
640    PUNPCKLDQ  ( MM2, MM2 )             /* x2            | x2                 */
641
642    PFMUL      ( MM3, MM0 )             /* x1*m1         | x0*m0              */
643    ADD_L      ( CONST(16), EAX )       /* next r                             */
644
645    PFMUL      ( MM4, MM1 )             /* x1*m5         | x0*m4              */
646    PFACC      ( MM1, MM0 )             /* x0*m4+x1*m5   | x0*m0+x1*m1        */
647
648    PFMUL      ( MM5, MM2 )             /* x2*m6         | x2*m2              */
649    PFADD      ( MM2, MM0 )             /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2  */
650
651    MOVQ       ( REGIND(EDX), MM1 )     /* x1            | x0                 */
652    MOVQ       ( MM0, REGOFF(-16, EAX) ) /* write r0, r1                      */
653
654    PFMUL      ( MM6, MM1 )             /* x1*m9         | x0*m8              */
655    MOVD       ( REGOFF(8, EDX), MM2 )  /*               | x2                 */
656
657    PFMUL      ( MM7, MM2 )             /*               | x2*m10             */
658    ADD_L      ( STRIDE, EDX )          /* next normal               */
659
660    PREFETCH   ( REGIND(EDX) )
661
662    PFACC      ( MM1, MM1 )             /* *not used*    | x0*m8+x1*m9        */
663    PFADD      ( MM2, MM1 )             /* *not used*    | x0*m8+x1*m9+x2*m10 */
664
665    MOVD       ( MM1, REGOFF(-8, EAX) ) /* write r2                           */
666    SUB_L      ( CONST(1), EDI )        /* decrement normal counter           */
667
668    JNZ        ( LLBL (G3T_transform) )
669
670    FEMMS
671
672LLBL (G3T_end):
673    POP_L  ( ESI )
674    POP_L  ( EDI )
675    RET
676
677
678
679
680
681
682ALIGNTEXT16
683GLOBL GLNAME(_mesa_3dnow_normalize_normals)
684HIDDEN(_mesa_3dnow_normalize_normals)
685GLNAME(_mesa_3dnow_normalize_normals):
686
687#undef  FRAME_OFFSET
688#define FRAME_OFFSET 12
689
690    PUSH_L     ( EDI )
691    PUSH_L     ( ESI )
692    PUSH_L     ( EBP )
693
694    MOV_L      ( ARG_IN, ESI )
695    MOV_L      ( ARG_DEST, EAX )
696    MOV_L      ( REGOFF(V4F_COUNT, ESI), EBP ) /*  dest->count = in->count   */
697    MOV_L      ( EBP, REGOFF(V4F_COUNT, EAX) )
698    MOV_L      ( REGOFF(V4F_START, EAX), EAX ) /*  dest->start  */
699    MOV_L      ( REGOFF(V4F_START, ESI), ECX ) /*  in->start    */
700    MOV_L      ( ARG_LENGTHS, EDX )
701
702    CMP_L      ( CONST(0), EBP ) /* count > 0 ?? */
703    JE         ( LLBL (G3N_end) )
704
705    FEMMS
706
707    CMP_L      ( CONST(0), EDX )        /* lengths == 0 ?                     */
708    JE         ( LLBL (G3N_norm2) )     /* calculate lengths                  */
709
710ALIGNTEXT32
711LLBL (G3N_norm1):                       /* use precalculated lengths          */
712
713    PREFETCH   ( REGIND(EAX) )
714
715    MOVQ       ( REGIND(ECX), MM0 )     /* x1              | x0               */
716    MOVD       ( REGOFF(8, ECX), MM1 )  /*                 | x2               */
717
718    MOVD       ( REGIND(EDX), MM3 )     /*                 | length (x)       */
719    PFMUL      ( MM3, MM1 )             /*                 | x2 (normalized)  */
720
721    PUNPCKLDQ  ( MM3, MM3 )             /* length (x)      | length (x)       */
722    ADD_L      ( STRIDE, ECX )          /* next normal            */
723
724    PREFETCH   ( REGIND(ECX) )
725
726    PFMUL      ( MM3, MM0 )             /* x1 (normalized) | x0 (normalized)  */
727    MOVQ       ( MM0, REGIND(EAX) )     /* write new x0, x1                   */
728
729    MOVD       ( MM1, REGOFF(8, EAX) )  /* write new x2                       */
730    ADD_L      ( CONST(16), EAX )       /* next r                             */
731
732    ADD_L      ( CONST(4), EDX )        /* next length                        */
733    SUB_L      ( CONST(1), EBP )        /* decrement normal counter           */
734
735    JNZ        ( LLBL (G3N_norm1) )
736
737    JMP        ( LLBL (G3N_end1) )
738
739ALIGNTEXT32
740LLBL (G3N_norm2):                       /* need to calculate lengths          */
741
742    PREFETCHW  ( REGIND(EAX) )
743
744    PREFETCH   ( REGIND(ECX) )
745
746    MOVQ       ( REGIND(ECX), MM0 )     /* x1              | x0               */
747    MOVD       ( REGOFF(8, ECX), MM1 )  /*                 | x2               */
748
749    MOVQ       ( MM0, MM3 )             /* x1              | x0               */
750    ADD_L      ( STRIDE, ECX )          /* next normal    */
751
752    PFMUL      ( MM0, MM3 )             /* x1*x1           | x0*x0            */
753    MOVQ       ( MM1, MM4 )             /*                 | x2               */
754
755    ADD_L      ( CONST(16), EAX )       /* next r                             */
756    PFMUL      ( MM1, MM4 )             /*                 | x2*x2            */
757
758    PFADD      ( MM4, MM3 )             /*                 | x0*x0+x2*x2      */
759    PFACC      ( MM3, MM3 )             /* x0*x0+...+x2*x2 | x0*x0+x1*x1+x2*x2*/
760
761    PFRSQRT    ( MM3, MM5 )             /* 1/sqrt (x0*x0+x1*x1+x2*x2)         */
762    MOVQ       ( MM5, MM4 )
763
764    PUNPCKLDQ  ( MM3, MM3 )
765    PFMUL      ( MM5, MM5 )
766
767    PFRSQIT1   ( MM3, MM5 )
768    SUB_L      ( CONST(1), EBP )        /* decrement normal counter           */
769
770    PFRCPIT2   ( MM4, MM5 )
771
772    PFMUL      ( MM5, MM0 )             /* x1 (normalized) | x0 (normalized)  */
773    MOVQ       ( MM0, REGOFF(-16, EAX) ) /* write new x0, x1                  */
774
775    PFMUL      ( MM5, MM1 )             /*                 | x2 (normalized)  */
776    MOVD       ( MM1, REGOFF(-8, EAX) ) /* write new x2                       */
777
778    JNZ        ( LLBL (G3N_norm2) )
779
780LLBL (G3N_end1):
781    FEMMS
782
783LLBL (G3N_end):
784    POP_L      ( EBP )
785    POP_L      ( ESI )
786    POP_L      ( EDI )
787    RET
788
789
790
791
792
793
794ALIGNTEXT16
795GLOBL GLNAME(_mesa_3dnow_rescale_normals)
796HIDDEN(_mesa_3dnow_rescale_normals)
797GLNAME(_mesa_3dnow_rescale_normals):
798
799#undef  FRAME_OFFSET
800#define FRAME_OFFSET 8
801    PUSH_L     ( EDI )
802    PUSH_L     ( ESI )
803
804    MOV_L      ( ARG_IN, ESI )
805    MOV_L      ( ARG_DEST, EAX )
806    MOV_L      ( REGOFF(V4F_COUNT, ESI), EDX ) /*  dest->count = in->count   */
807    MOV_L      ( EDX, REGOFF(V4F_COUNT, EAX) )
808    MOV_L      ( REGOFF(V4F_START, EAX), EAX ) /*  dest->start  */
809    MOV_L      ( REGOFF(V4F_START, ESI), ECX ) /*  in->start    */
810
811    CMP_L      ( CONST(0), EDX )
812    JE         ( LLBL (G3R_end) )
813
814    FEMMS
815
816    MOVD       ( ARG_SCALE, MM0 )       /* scale                              */
817    PUNPCKLDQ  ( MM0, MM0 )
818
819ALIGNTEXT32
820LLBL (G3R_rescale):
821
822    PREFETCHW  ( REGIND(EAX) )
823
824    MOVQ       ( REGIND(ECX), MM1 )     /* x1            | x0                 */
825    MOVD       ( REGOFF(8, ECX), MM2 )  /*               | x2                 */
826
827    PFMUL      ( MM0, MM1 )             /* x1*scale      | x0*scale           */
828    ADD_L      ( STRIDE, ECX )          /* next normal                  */
829
830    PREFETCH   ( REGIND(ECX) )
831
832    PFMUL      ( MM0, MM2 )             /*               | x2*scale           */
833    ADD_L      ( CONST(16), EAX )       /* next r                             */
834
835    MOVQ       ( MM1, REGOFF(-16, EAX) ) /* write r0, r1                      */
836    MOVD       ( MM2, REGOFF(-8, EAX) ) /* write r2                           */
837
838    SUB_L      ( CONST(1), EDX )        /* decrement normal counter           */
839    JNZ        ( LLBL (G3R_rescale) )
840
841    FEMMS
842
843LLBL (G3R_end):
844    POP_L      ( ESI )
845    POP_L      ( EDI )
846    RET
847
848#endif
849
850#if defined (__ELF__) && defined (__linux__)
851	.section .note.GNU-stack,"",%progbits
852#endif
853