1
2#include "sparc_matrix.h"
3
4	.register %g2, #scratch
5	.register %g3, #scratch
6
7	.text
8
9#ifdef __arch64__
10#define STACK_VAR_OFF	(2047 + (8 * 16))
11#else
12#define STACK_VAR_OFF	(4 * 16)
13#endif
14
15	/* Newton-Raphson approximation turns out to be slower
16	 * (and less accurate) than direct fsqrts/fdivs.
17	 */
18#define ONE_DOT_ZERO	0x3f800000
19
20	.globl	_mesa_sparc_transform_normalize_normals
21_mesa_sparc_transform_normalize_normals:
22	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
23
24	sethi	%hi(ONE_DOT_ZERO), %g2
25	sub	%sp, 16, %sp
26	st	%g2, [%sp + STACK_VAR_OFF+0x0]
27	st	%o1, [%sp + STACK_VAR_OFF+0x4]
28	ld	[%sp + STACK_VAR_OFF+0x0], %f12	! f12 = 1.0f
29	ld	[%sp + STACK_VAR_OFF+0x4], %f15	! f15 = scale
30	add	%sp, 16, %sp
31
32	LDPTR	[%o0 + MATRIX_INV], %o0		! o0 = mat->inv
33	LDPTR	[%o2 + V4F_START], %o5		! o5 = 'from' in->start
34	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
35	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
36	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
37
38	LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
39
40	/* dest->count = in->count */
41	st	%g1, [%o4 + V4F_COUNT]
42
43	cmp	%g1, 1
44	bl	7f
45	 cmp	%o3, 0
46	bne	4f
47	 clr	%o4				! 'i' for STRIDE_LOOP
48
491:	/* LENGTHS == NULL */
50	ld	[%o5 + 0x00], %f0		! ux = from[0]
51	ld	[%o5 + 0x04], %f1		! uy = from[1]
52	ld	[%o5 + 0x08], %f2		! uz = from[2]
53	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
54	add	%o4, 1, %o4			! i++
55
56	/* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2)
57	 * ty (f5) = (ux * m4) + (uy * m5) + (uz * m6)
58	 * tz (f7) = (ux * m8) + (uy * m9) + (uz * m10)
59	 */
60	fmuls	%f0, M0, %f3			! FGM	Group
61	fmuls	%f1, M1, %f4			! FGM	Group
62	fmuls	%f0, M4, %f5			! FGM	Group
63	fmuls	%f1, M5, %f6			! FGM	Group
64	fmuls	%f0, M8, %f7			! FGM	Group	f3 available
65	fmuls	%f1, M9, %f8			! FGM	Group	f4 available
66	fadds	%f3, %f4, %f3			! FGA
67	fmuls	%f2, M2, %f10			! FGM	Group	f5 available
68	fmuls	%f2, M6, %f0			! FGM	Group	f6 available
69	fadds	%f5, %f6, %f5			! FGA
70	fmuls	%f2, M10, %f4			! FGM	Group	f7 available
71	fadds	%f7, %f8, %f7			! FGA	Group	f8,f3 available
72	fadds	%f3, %f10, %f3			! FGA	Group	f10 available
73	fadds	%f5, %f0, %f5			! FGA	Group	stall f0,f5 available
74	fadds	%f7, %f4, %f7			! FGA	Group	stall f4,f7 available
75
76	/* f3=tx, f5=ty, f7=tz */
77
78	/* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
79	fmuls	%f3, %f3, %f6			! FGM	Group	f3 available
80	fmuls	%f5, %f5, %f8			! FGM	Group	f5 available
81	fmuls	%f7, %f7, %f10			! FGM	Group	f7 available
82	fadds	%f6, %f8, %f6			! FGA	Group	2cyc stall f6,f8 available
83	fadds	%f6, %f10, %f6			! FGA	Group	4cyc stall f6,f10 available
84
85	/* scale (f6) = 1.0 / sqrt(len) */
86	fsqrts	%f6, %f6			! FDIV  20 cycles
87	fdivs	%f12, %f6, %f6			! FDIV	14 cycles
88
89	fmuls	%f3, %f6, %f3
90	st	%f3, [%g3 + 0x00]		! out[i][0] = tx * scale
91	fmuls	%f5, %f6, %f5
92	st	%f5, [%g3 + 0x04]		! out[i][1] = ty * scale
93	fmuls	%f7, %f6, %f7
94	st	%f7, [%g3 + 0x08]		! out[i][2] = tz * scale
95
96	cmp	%o4, %g1			! continue if (i < count)
97	bl	1b
98	 add	%g3, 0x10, %g3			! advance out vector pointer
99
100	ba	7f
101	 nop
102
1034:	/* LENGTHS != NULL */
104	fmuls	M0, %f15, M0
105	fmuls	M1, %f15, M1
106	fmuls	M2, %f15, M2
107	fmuls	M4, %f15, M4
108	fmuls	M5, %f15, M5
109	fmuls	M6, %f15, M6
110	fmuls	M8, %f15, M8
111	fmuls	M9, %f15, M9
112	fmuls	M10, %f15, M10
113
1145:
115	ld	[%o5 + 0x00], %f0		! ux = from[0]
116	ld	[%o5 + 0x04], %f1		! uy = from[1]
117	ld	[%o5 + 0x08], %f2		! uz = from[2]
118	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
119	add	%o4, 1, %o4			! i++
120
121	/* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2)
122	 * ty (f5) = (ux * m4) + (uy * m5) + (uz * m6)
123	 * tz (f7) = (ux * m8) + (uy * m9) + (uz * m10)
124	 */
125	fmuls	%f0, M0, %f3			! FGM	Group
126	fmuls	%f1, M1, %f4			! FGM	Group
127	fmuls	%f0, M4, %f5			! FGM	Group
128	fmuls	%f1, M5, %f6			! FGM	Group
129	fmuls	%f0, M8, %f7			! FGM	Group	f3 available
130	fmuls	%f1, M9, %f8			! FGM	Group	f4 available
131	fadds	%f3, %f4, %f3			! FGA
132	fmuls	%f2, M2, %f10			! FGM	Group	f5 available
133	fmuls	%f2, M6, %f0			! FGM	Group	f6 available
134	fadds	%f5, %f6, %f5			! FGA
135	fmuls	%f2, M10, %f4			! FGM	Group	f7 available
136	fadds	%f7, %f8, %f7			! FGA	Group	f8,f3 available
137	fadds	%f3, %f10, %f3			! FGA	Group	f10 available
138	ld	[%o3], %f13			! LSU
139	fadds	%f5, %f0, %f5			! FGA	Group	stall f0,f5 available
140	add	%o3, 4, %o3			! IEU0
141	fadds	%f7, %f4, %f7			! FGA	Group	stall f4,f7 available
142
143	/* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
144
145	fmuls	%f3, %f13, %f3
146	st	%f3, [%g3 + 0x00]		! out[i][0] = tx * len
147	fmuls	%f5, %f13, %f5
148	st	%f5, [%g3 + 0x04]		! out[i][1] = ty * len
149	fmuls	%f7, %f13, %f7
150	st	%f7, [%g3 + 0x08]		! out[i][2] = tz * len
151
152	cmp	%o4, %g1			! continue if (i < count)
153	bl	5b
154	 add	%g3, 0x10, %g3			! advance out vector pointer
155
1567:	retl
157	 nop
158
159	.globl	_mesa_sparc_transform_normalize_normals_no_rot
160_mesa_sparc_transform_normalize_normals_no_rot:
161	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
162
163	sethi	%hi(ONE_DOT_ZERO), %g2
164	sub	%sp, 16, %sp
165	st	%g2, [%sp + STACK_VAR_OFF+0x0]
166	st	%o1, [%sp + STACK_VAR_OFF+0x4]
167	ld	[%sp + STACK_VAR_OFF+0x0], %f12	! f12 = 1.0f
168	ld	[%sp + STACK_VAR_OFF+0x4], %f15	! f15 = scale
169	add	%sp, 16, %sp
170
171	LDPTR	[%o0 + MATRIX_INV], %o0		! o0 = mat->inv
172	LDPTR	[%o2 + V4F_START], %o5		! o5 = 'from' in->start
173	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
174	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
175	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
176
177	LDMATRIX_0_5_10(%o0)
178
179	/* dest->count = in->count */
180	st	%g1, [%o4 + V4F_COUNT]
181
182	cmp	%g1, 1
183	bl	7f
184	 cmp	%o3, 0
185	bne	4f
186	 clr	%o4				! 'i' for STRIDE_LOOP
187
1881:	/* LENGTHS == NULL */
189	ld	[%o5 + 0x00], %f0		! ux = from[0]
190	ld	[%o5 + 0x04], %f1		! uy = from[1]
191	ld	[%o5 + 0x08], %f2		! uz = from[2]
192	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
193	add	%o4, 1, %o4			! i++
194
195	/* tx (f3) = (ux * m0)
196	 * ty (f5) = (uy * m5)
197	 * tz (f7) = (uz * m10)
198	 */
199	fmuls	%f0, M0, %f3			! FGM	Group
200	fmuls	%f1, M5, %f5			! FGM	Group
201	fmuls	%f2, M10, %f7			! FGM	Group
202
203	/* f3=tx, f5=ty, f7=tz */
204
205	/* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
206	fmuls	%f3, %f3, %f6			! FGM	Group	stall, f3 available
207	fmuls	%f5, %f5, %f8			! FGM	Group	f5 available
208	fmuls	%f7, %f7, %f10			! FGM	Group	f7 available
209	fadds	%f6, %f8, %f6			! FGA	Group	2cyc stall f6,f8 available
210	fadds	%f6, %f10, %f6			! FGA	Group	4cyc stall f6,f10 available
211
212	/* scale (f6) = 1.0 / sqrt(len) */
213	fsqrts	%f6, %f6			! FDIV  20 cycles
214	fdivs	%f12, %f6, %f6			! FDIV	14 cycles
215
216	fmuls	%f3, %f6, %f3
217	st	%f3, [%g3 + 0x00]		! out[i][0] = tx * scale
218	fmuls	%f5, %f6, %f5
219	st	%f5, [%g3 + 0x04]		! out[i][1] = ty * scale
220	fmuls	%f7, %f6, %f7
221	st	%f7, [%g3 + 0x08]		! out[i][2] = tz * scale
222
223	cmp	%o4, %g1			! continue if (i < count)
224	bl	1b
225	 add	%g3, 0x10, %g3			! advance out vector pointer
226
227	ba	7f
228	 nop
229
2304:	/* LENGTHS != NULL */
231	fmuls	M0, %f15, M0
232	fmuls	M5, %f15, M5
233	fmuls	M10, %f15, M10
234
2355:
236	ld	[%o5 + 0x00], %f0		! ux = from[0]
237	ld	[%o5 + 0x04], %f1		! uy = from[1]
238	ld	[%o5 + 0x08], %f2		! uz = from[2]
239	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
240	add	%o4, 1, %o4			! i++
241
242	/* tx (f3) = (ux * m0)
243	 * ty (f5) = (uy * m5)
244	 * tz (f7) = (uz * m10)
245	 */
246	fmuls	%f0, M0, %f3			! FGM	Group
247	ld	[%o3], %f13			! LSU
248	fmuls	%f1, M5, %f5			! FGM	Group
249	add	%o3, 4, %o3			! IEU0
250	fmuls	%f2, M10, %f7			! FGM	Group
251
252	/* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
253
254	fmuls	%f3, %f13, %f3
255	st	%f3, [%g3 + 0x00]		! out[i][0] = tx * len
256	fmuls	%f5, %f13, %f5
257	st	%f5, [%g3 + 0x04]		! out[i][1] = ty * len
258	fmuls	%f7, %f13, %f7
259	st	%f7, [%g3 + 0x08]		! out[i][2] = tz * len
260
261	cmp	%o4, %g1			! continue if (i < count)
262	bl	5b
263	 add	%g3, 0x10, %g3			! advance out vector pointer
264
2657:	retl
266	 nop
267
268	.globl	_mesa_sparc_transform_rescale_normals_no_rot
269_mesa_sparc_transform_rescale_normals_no_rot:
270	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
271	sub	%sp, 16, %sp
272	st	%o1, [%sp + STACK_VAR_OFF+0x0]
273	ld	[%sp + STACK_VAR_OFF+0x0], %f15	! f15 = scale
274	add	%sp, 16, %sp
275
276	LDPTR	[%o0 + MATRIX_INV], %o0		! o0 = mat->inv
277	LDPTR	[%o2 + V4F_START], %o5		! o5 = 'from' in->start
278	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
279	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
280	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
281
282	LDMATRIX_0_5_10(%o0)
283
284	/* dest->count = in->count */
285	st	%g1, [%o4 + V4F_COUNT]
286
287	cmp	%g1, 1
288	bl	7f
289	 clr	%o4				! 'i' for STRIDE_LOOP
290
291	fmuls	M0, %f15, M0
292	fmuls	M5, %f15, M5
293	fmuls	M10, %f15, M10
294
2951:	ld	[%o5 + 0x00], %f0		! ux = from[0]
296	ld	[%o5 + 0x04], %f1		! uy = from[1]
297	ld	[%o5 + 0x08], %f2		! uz = from[2]
298	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
299	add	%o4, 1, %o4			! i++
300
301	/* tx (f3) = (ux * m0)
302	 * ty (f5) = (uy * m5)
303	 * tz (f7) = (uz * m10)
304	 */
305	fmuls	%f0, M0, %f3			! FGM	Group
306	st	%f3, [%g3 + 0x00]		! LSU
307	fmuls	%f1, M5, %f5			! FGM	Group
308	st	%f5, [%g3 + 0x04]		! LSU
309	fmuls	%f2, M10, %f7			! FGM	Group
310	st	%f7, [%g3 + 0x08]		! LSU
311
312	cmp	%o4, %g1			! continue if (i < count)
313	bl	1b
314	 add	%g3, 0x10, %g3			! advance out vector pointer
315
3167:	retl
317	 nop
318
319	.globl	_mesa_sparc_transform_rescale_normals
320_mesa_sparc_transform_rescale_normals:
321	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
322	sub	%sp, 16, %sp
323	st	%o1, [%sp + STACK_VAR_OFF+0x0]
324	ld	[%sp + STACK_VAR_OFF+0x0], %f15	! f15 = scale
325	add	%sp, 16, %sp
326
327	LDPTR	[%o0 + MATRIX_INV], %o0		! o0 = mat->inv
328	LDPTR	[%o2 + V4F_START], %o5		! o5 = 'from' in->start
329	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
330	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
331	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
332
333	LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
334
335	/* dest->count = in->count */
336	st	%g1, [%o4 + V4F_COUNT]
337
338	cmp	%g1, 1
339	bl	7f
340	 clr	%o4				! 'i' for STRIDE_LOOP
341
342	fmuls	M0, %f15, M0
343	fmuls	M1, %f15, M1
344	fmuls	M2, %f15, M2
345	fmuls	M4, %f15, M4
346	fmuls	M5, %f15, M5
347	fmuls	M6, %f15, M6
348	fmuls	M8, %f15, M8
349	fmuls	M9, %f15, M9
350	fmuls	M10, %f15, M10
351
3521:	ld	[%o5 + 0x00], %f0		! ux = from[0]
353	ld	[%o5 + 0x04], %f1		! uy = from[1]
354	ld	[%o5 + 0x08], %f2		! uz = from[2]
355	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
356	add	%o4, 1, %o4			! i++
357
358	fmuls	%f0, M0, %f3			! FGM	Group
359	fmuls	%f1, M1, %f4			! FGM	Group
360	fmuls	%f0, M4, %f5			! FGM	Group
361	fmuls	%f1, M5, %f6			! FGM	Group
362	fmuls	%f0, M8, %f7			! FGM	Group	f3 available
363	fmuls	%f1, M9, %f8			! FGM	Group	f4 available
364	fadds	%f3, %f4, %f3			! FGA
365	fmuls	%f2, M2, %f10			! FGM	Group	f5 available
366	fmuls	%f2, M6, %f0			! FGM	Group	f6 available
367	fadds	%f5, %f6, %f5			! FGA
368	fmuls	%f2, M10, %f4			! FGM	Group	f7 available
369	fadds	%f7, %f8, %f7			! FGA	Group	f8,f3 available
370	fadds	%f3, %f10, %f3			! FGA	Group	f10 available
371	st	%f3, [%g3 + 0x00]		! LSU
372	fadds	%f5, %f0, %f5			! FGA	Group	stall f0,f5 available
373	st	%f5, [%g3 + 0x04]		! LSU
374	fadds	%f7, %f4, %f7			! FGA	Group	stall f4,f7 available
375	st	%f7, [%g3 + 0x08]		! LSU
376
377	cmp	%o4, %g1			! continue if (i < count)
378	bl	1b
379	 add	%g3, 0x10, %g3			! advance out vector pointer
380
3817:	retl
382	 nop
383
384	.globl	_mesa_sparc_transform_normals_no_rot
385_mesa_sparc_transform_normals_no_rot:
386	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
387	LDPTR	[%o0 + MATRIX_INV], %o0		! o0 = mat->inv
388	LDPTR	[%o2 + V4F_START], %o5		! o5 = 'from' in->start
389	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
390	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
391	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
392
393	LDMATRIX_0_5_10(%o0)
394
395	/* dest->count = in->count */
396	st	%g1, [%o4 + V4F_COUNT]
397
398	cmp	%g1, 1
399	bl	7f
400	 clr	%o4				! 'i' for STRIDE_LOOP
401
4021:	ld	[%o5 + 0x00], %f0		! ux = from[0]
403	ld	[%o5 + 0x04], %f1		! uy = from[1]
404	ld	[%o5 + 0x08], %f2		! uz = from[2]
405	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
406	add	%o4, 1, %o4			! i++
407
408	/* tx (f3) = (ux * m0)
409	 * ty (f5) = (uy * m5)
410	 * tz (f7) = (uz * m10)
411	 */
412	fmuls	%f0, M0, %f3			! FGM	Group
413	st	%f3, [%g3 + 0x00]		! LSU
414	fmuls	%f1, M5, %f5			! FGM	Group
415	st	%f5, [%g3 + 0x04]		! LSU
416	fmuls	%f2, M10, %f7			! FGM	Group
417	st	%f7, [%g3 + 0x08]		! LSU
418
419	cmp	%o4, %g1			! continue if (i < count)
420	bl	1b
421	 add	%g3, 0x10, %g3			! advance out vector pointer
422
4237:	retl
424	 nop
425
426	.globl	_mesa_sparc_transform_normals
427_mesa_sparc_transform_normals:
428	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
429	LDPTR	[%o0 + MATRIX_INV], %o0		! o0 = mat->inv
430	LDPTR	[%o2 + V4F_START], %o5		! o5 = 'from' in->start
431	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
432	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
433	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
434
435	LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
436
437	/* dest->count = in->count */
438	st	%g1, [%o4 + V4F_COUNT]
439
440	cmp	%g1, 1
441	bl	7f
442	 clr	%o4				! 'i' for STRIDE_LOOP
443
4441:	ld	[%o5 + 0x00], %f0		! ux = from[0]
445	ld	[%o5 + 0x04], %f1		! uy = from[1]
446	ld	[%o5 + 0x08], %f2		! uz = from[2]
447	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
448	add	%o4, 1, %o4			! i++
449
450	fmuls	%f0, M0, %f3			! FGM	Group
451	fmuls	%f1, M1, %f4			! FGM	Group
452	fmuls	%f0, M4, %f5			! FGM	Group
453	fmuls	%f1, M5, %f6			! FGM	Group
454	fmuls	%f0, M8, %f7			! FGM	Group	f3 available
455	fmuls	%f1, M9, %f8			! FGM	Group	f4 available
456	fadds	%f3, %f4, %f3			! FGA
457	fmuls	%f2, M2, %f10			! FGM	Group	f5 available
458	fmuls	%f2, M6, %f0			! FGM	Group	f6 available
459	fadds	%f5, %f6, %f5			! FGA
460	fmuls	%f2, M10, %f4			! FGM	Group	f7 available
461	fadds	%f7, %f8, %f7			! FGA	Group	f8,f3 available
462	fadds	%f3, %f10, %f3			! FGA	Group	f10 available
463	st	%f3, [%g3 + 0x00]		! LSU
464	fadds	%f5, %f0, %f5			! FGA	Group	stall f0,f5 available
465	st	%f5, [%g3 + 0x04]		! LSU
466	fadds	%f7, %f4, %f7			! FGA	Group	stall f4,f7 available
467	st	%f7, [%g3 + 0x08]		! LSU
468
469	cmp	%o4, %g1			! continue if (i < count)
470	bl	1b
471	 add	%g3, 0x10, %g3			! advance out vector pointer
472
4737:	retl
474	 nop
475
476	.globl	_mesa_sparc_normalize_normals
477_mesa_sparc_normalize_normals:
478	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
479
480	sethi	%hi(ONE_DOT_ZERO), %g2
481	sub	%sp, 16, %sp
482	st	%g2, [%sp + STACK_VAR_OFF+0x0]
483	ld	[%sp + STACK_VAR_OFF+0x0], %f12	! f12 = 1.0f
484	add	%sp, 16, %sp
485
486	LDPTR	[%o2 + V4F_START], %o5		! o5 = 'from' in->start
487	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
488	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
489	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
490
491	/* dest->count = in->count */
492	st	%g1, [%o4 + V4F_COUNT]
493
494	cmp	%g1, 1
495	bl	7f
496	 cmp	%o3, 0
497	bne	4f
498	 clr	%o4				! 'i' for STRIDE_LOOP
499
5001:	/* LENGTHS == NULL */
501	ld	[%o5 + 0x00], %f3		! ux = from[0]
502	ld	[%o5 + 0x04], %f5		! uy = from[1]
503	ld	[%o5 + 0x08], %f7		! uz = from[2]
504	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
505	add	%o4, 1, %o4			! i++
506
507	/* f3=tx, f5=ty, f7=tz */
508
509	/* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
510	fmuls	%f3, %f3, %f6			! FGM	Group	f3 available
511	fmuls	%f5, %f5, %f8			! FGM	Group	f5 available
512	fmuls	%f7, %f7, %f10			! FGM	Group	f7 available
513	fadds	%f6, %f8, %f6			! FGA	Group	2cyc stall f6,f8 available
514	fadds	%f6, %f10, %f6			! FGA	Group	4cyc stall f6,f10 available
515
516	/* scale (f6) = 1.0 / sqrt(len) */
517	fsqrts	%f6, %f6			! FDIV  20 cycles
518	fdivs	%f12, %f6, %f6			! FDIV	14 cycles
519
520	fmuls	%f3, %f6, %f3
521	st	%f3, [%g3 + 0x00]		! out[i][0] = tx * scale
522	fmuls	%f5, %f6, %f5
523	st	%f5, [%g3 + 0x04]		! out[i][1] = ty * scale
524	fmuls	%f7, %f6, %f7
525	st	%f7, [%g3 + 0x08]		! out[i][2] = tz * scale
526
527	cmp	%o4, %g1			! continue if (i < count)
528	bl	1b
529	 add	%g3, 0x10, %g3			! advance out vector pointer
530
531	ba	7f
532	 nop
533
5344:	/* LENGTHS != NULL */
535
5365:
537	ld	[%o5 + 0x00], %f3		! ux = from[0]
538	ld	[%o5 + 0x04], %f5		! uy = from[1]
539	ld	[%o5 + 0x08], %f7		! uz = from[2]
540	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
541	add	%o4, 1, %o4			! i++
542
543	ld	[%o3], %f13			! LSU
544	add	%o3, 4, %o3			! IEU0
545
546	/* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
547
548	fmuls	%f3, %f13, %f3
549	st	%f3, [%g3 + 0x00]		! out[i][0] = tx * len
550	fmuls	%f5, %f13, %f5
551	st	%f5, [%g3 + 0x04]		! out[i][1] = ty * len
552	fmuls	%f7, %f13, %f7
553	st	%f7, [%g3 + 0x08]		! out[i][2] = tz * len
554
555	cmp	%o4, %g1			! continue if (i < count)
556	bl	5b
557	 add	%g3, 0x10, %g3			! advance out vector pointer
558
5597:	retl
560	 nop
561
562	.globl	_mesa_sparc_rescale_normals
563_mesa_sparc_rescale_normals:
564	/* o0=mat o1=scale o2=in o3=lengths o4=dest */
565
566	sethi	%hi(ONE_DOT_ZERO), %g2
567	sub	%sp, 16, %sp
568	st	%o1, [%sp + STACK_VAR_OFF+0x0]
569	ld	[%sp + STACK_VAR_OFF+0x0], %f15	! f15 = scale
570	add	%sp, 16, %sp
571
572	LDPTR	[%o2 + V4F_START], %o5		! o5 = 'from' in->start
573	ld	[%o2 + V4F_COUNT], %g1		! g1 = in->count
574	ld	[%o2 + V4F_STRIDE], %g2		! g2 = in->stride
575	LDPTR	[%o4 + V4F_START], %g3		! g3 = 'out' dest->start
576
577	/* dest->count = in->count */
578	st	%g1, [%o4 + V4F_COUNT]
579
580	cmp	%g1, 1
581	bl	7f
582	 clr	%o4				! 'i' for STRIDE_LOOP
583
5841:
585	ld	[%o5 + 0x00], %f3		! ux = from[0]
586	ld	[%o5 + 0x04], %f5		! uy = from[1]
587	ld	[%o5 + 0x08], %f7		! uz = from[2]
588	add	%o5, %g2, %o5			! STRIDE_F(from, stride)
589	add	%o4, 1, %o4			! i++
590
591	/* f3=tx, f5=ty, f7=tz */
592
593	fmuls	%f3, %f15, %f3
594	st	%f3, [%g3 + 0x00]		! out[i][0] = tx * scale
595	fmuls	%f5, %f15, %f5
596	st	%f5, [%g3 + 0x04]		! out[i][1] = ty * scale
597	fmuls	%f7, %f15, %f7
598	st	%f7, [%g3 + 0x08]		! out[i][2] = tz * scale
599
600	cmp	%o4, %g1			! continue if (i < count)
601	bl	1b
602	 add	%g3, 0x10, %g3			! advance out vector pointer
603
6047:	retl
605	 nop
606