1 /*-------------------------------------------------------------------------
2  * drawElements Base Portability Library
3  * -------------------------------------
4  *
5  * Copyright 2014 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief 32-bit integer math.
22  *//*--------------------------------------------------------------------*/
23 
24 #include "deInt32.h"
25 
26 DE_BEGIN_EXTERN_C
27 
28 const deInt8 g_clzLUT[256] =
29 {
30 	8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
31 	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
32 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
33 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
34 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
39 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
40 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
41 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
42 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
43 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
45 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
46 };
47 
48 /*--------------------------------------------------------------------*//*!
49  * \brief Compute the reciprocal of an integer.
50  * \param a		Input value.
51  * \param rcp	Pointer to resulting reciprocal "mantissa" value.
52  * \param exp	Pointer to resulting exponent value.
53  *
54  * The returned value is split into an exponent part and a mantissa part for
55  * practical reasons. The actual reciprocal can be computed with the formula:
56  * result = exp2(exp) * rcp / (1<<DE_RCP_FRAC_BITS).
57  *//*--------------------------------------------------------------------*/
deRcp32(deUint32 a,deUint32 * rcp,int * exp)58 void deRcp32 (deUint32 a, deUint32* rcp, int* exp)
59 {
60 	enum { RCP_LUT_BITS = 8 };
61 	static const deUint32 s_rcpLUT[1<<RCP_LUT_BITS] =
62 	{
63 		0x40000000, 0x3fc03fc0, 0x3f80fe03, 0x3f423954,
64 		0x3f03f03f, 0x3ec62159, 0x3e88cb3c, 0x3e4bec88,
65 		0x3e0f83e0, 0x3dd38ff0, 0x3d980f66, 0x3d5d00f5,
66 		0x3d226357, 0x3ce8354b, 0x3cae7592, 0x3c7522f3,
67 		0x3c3c3c3c, 0x3c03c03c, 0x3bcbadc7, 0x3b9403b9,
68 		0x3b5cc0ed, 0x3b25e446, 0x3aef6ca9, 0x3ab95900,
69 		0x3a83a83a, 0x3a4e5947, 0x3a196b1e, 0x39e4dcb8,
70 		0x39b0ad12, 0x397cdb2c, 0x3949660a, 0x39164cb5,
71 		0x38e38e38, 0x38b129a2, 0x387f1e03, 0x384d6a72,
72 		0x381c0e07, 0x37eb07dd, 0x37ba5713, 0x3789facb,
73 		0x3759f229, 0x372a3c56, 0x36fad87b, 0x36cbc5c7,
74 		0x369d0369, 0x366e9095, 0x36406c80, 0x36129663,
75 		0x35e50d79, 0x35b7d0ff, 0x358ae035, 0x355e3a5f,
76 		0x3531dec0, 0x3505cca2, 0x34da034d, 0x34ae820e,
77 		0x34834834, 0x3458550f, 0x342da7f2, 0x34034034,
78 		0x33d91d2a, 0x33af3e2e, 0x3385a29d, 0x335c49d4,
79 		0x33333333, 0x330a5e1b, 0x32e1c9f0, 0x32b97617,
80 		0x329161f9, 0x32698cff, 0x3241f693, 0x321a9e24,
81 		0x31f3831f, 0x31cca4f5, 0x31a6031a, 0x317f9d00,
82 		0x3159721e, 0x313381ec, 0x310dcbe1, 0x30e84f79,
83 		0x30c30c30, 0x309e0184, 0x30792ef5, 0x30549403,
84 		0x30303030, 0x300c0300, 0x2fe80bfa, 0x2fc44aa2,
85 		0x2fa0be82, 0x2f7d6724, 0x2f5a4411, 0x2f3754d7,
86 		0x2f149902, 0x2ef21023, 0x2ecfb9c8, 0x2ead9584,
87 		0x2e8ba2e8, 0x2e69e18a, 0x2e4850fe, 0x2e26f0db,
88 		0x2e05c0b8, 0x2de4c02d, 0x2dc3eed6, 0x2da34c4d,
89 		0x2d82d82d, 0x2d629215, 0x2d4279a2, 0x2d228e75,
90 		0x2d02d02d, 0x2ce33e6c, 0x2cc3d8d4, 0x2ca49f0a,
91 		0x2c8590b2, 0x2c66ad71, 0x2c47f4ee, 0x2c2966d0,
92 		0x2c0b02c0, 0x2becc868, 0x2bceb771, 0x2bb0cf87,
93 		0x2b931057, 0x2b75798c, 0x2b580ad6, 0x2b3ac3e2,
94 		0x2b1da461, 0x2b00ac02, 0x2ae3da78, 0x2ac72f74,
95 		0x2aaaaaaa, 0x2a8e4bcd, 0x2a721291, 0x2a55fead,
96 		0x2a3a0fd5, 0x2a1e45c2, 0x2a02a02a, 0x29e71ec5,
97 		0x29cbc14e, 0x29b0877d, 0x2995710e, 0x297a7dbb,
98 		0x295fad40, 0x2944ff5a, 0x292a73c7, 0x29100a44,
99 		0x28f5c28f, 0x28db9c68, 0x28c1978f, 0x28a7b3c5,
100 		0x288df0ca, 0x28744e61, 0x285acc4b, 0x28416a4c,
101 		0x28282828, 0x280f05a2, 0x27f6027f, 0x27dd1e85,
102 		0x27c45979, 0x27abb323, 0x27932b48, 0x277ac1b2,
103 		0x27627627, 0x274a4870, 0x27323858, 0x271a45a6,
104 		0x27027027, 0x26eab7a3, 0x26d31be7, 0x26bb9cbf,
105 		0x26a439f6, 0x268cf359, 0x2675c8b6, 0x265eb9da,
106 		0x2647c694, 0x2630eeb1, 0x261a3202, 0x26039055,
107 		0x25ed097b, 0x25d69d43, 0x25c04b80, 0x25aa1402,
108 		0x2593f69b, 0x257df31c, 0x2568095a, 0x25523925,
109 		0x253c8253, 0x2526e4b7, 0x25116025, 0x24fbf471,
110 		0x24e6a171, 0x24d166f9, 0x24bc44e1, 0x24a73afd,
111 		0x24924924, 0x247d6f2e, 0x2468acf1, 0x24540245,
112 		0x243f6f02, 0x242af300, 0x24168e18, 0x24024024,
113 		0x23ee08fb, 0x23d9e878, 0x23c5de76, 0x23b1eace,
114 		0x239e0d5b, 0x238a45f8, 0x23769480, 0x2362f8cf,
115 		0x234f72c2, 0x233c0233, 0x2328a701, 0x23156107,
116 		0x23023023, 0x22ef1432, 0x22dc0d12, 0x22c91aa1,
117 		0x22b63cbe, 0x22a37347, 0x2290be1c, 0x227e1d1a,
118 		0x226b9022, 0x22591713, 0x2246b1ce, 0x22346033,
119 		0x22222222, 0x220ff77c, 0x21fde021, 0x21ebdbf5,
120 		0x21d9ead7, 0x21c80cab, 0x21b64151, 0x21a488ac,
121 		0x2192e29f, 0x21814f0d, 0x216fcdd8, 0x215e5ee4,
122 		0x214d0214, 0x213bb74d, 0x212a7e72, 0x21195766,
123 		0x21084210, 0x20f73e53, 0x20e64c14, 0x20d56b38,
124 		0x20c49ba5, 0x20b3dd40, 0x20a32fef, 0x20929398,
125 		0x20820820, 0x20718d6f, 0x2061236a, 0x2050c9f8,
126 		0x20408102, 0x2030486c, 0x20202020, 0x20100804
127 	};
128 
129 	DE_STATIC_ASSERT(DE_LENGTH_OF_ARRAY(s_rcpLUT) == (1<<RCP_LUT_BITS));
130 
131 	int			shift		= deClz32(a);
132 	deUint32	normalized	= (deUint32)a << shift; /* Highest bit is always 1. */
133 	int			lookupNdx	= (normalized >> (31 - RCP_LUT_BITS)) & ((1<<RCP_LUT_BITS)-1); /* Discard high bit, leave 8 next highest bits to lowest bits of normalized. */
134 	deUint32	result;
135 	deUint32	tmp;
136 
137 	/* Must be non-negative. */
138 	DE_ASSERT(a > 0);
139 
140 	/* First approximation from lookup table. */
141 	DE_ASSERT(lookupNdx >= 0 && lookupNdx < (1<<RCP_LUT_BITS));
142 	result = s_rcpLUT[lookupNdx];
143 
144 	/* Newton-Raphson iteration for improved accuracy (has 16 bits of accuracy afterwards). */
145 	tmp		= deSafeMuluAsr32(result, normalized, 31);
146 	result	= deSafeMuluAsr32(result, (2u<<DE_RCP_FRAC_BITS) - tmp, DE_RCP_FRAC_BITS);
147 
148 	/* Second Newton-Raphson iteration (now has full accuracy). */
149 	tmp		= deSafeMuluAsr32(result, normalized, 31);
150 	result	= deSafeMuluAsr32(result, (2u<<DE_RCP_FRAC_BITS) - tmp, DE_RCP_FRAC_BITS);
151 
152 	/* Return results. */
153 	*rcp = result;
154 	*exp = 31 - shift;
155 }
156 
157 DE_END_EXTERN_C
158