1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /* ---- includes ----------------------------------------------------------- */
18 
19 #include "b_TensorEm/Flt16Mat3D.h"
20 #include "b_TensorEm/Functions.h"
21 #include "b_BasicEm/Math.h"
22 
23 /* ------------------------------------------------------------------------- */
24 
25 /* ========================================================================= */
26 /*                                                                           */
27 /* ---- \ghd{ auxiliary functions } ---------------------------------------- */
28 /*                                                                           */
29 /* ========================================================================= */
30 
31 /* ------------------------------------------------------------------------- */
32 
33 /* ========================================================================= */
34 /*                                                                           */
35 /* ---- \ghd{ constructor / destructor } ----------------------------------- */
36 /*                                                                           */
37 /* ========================================================================= */
38 
39 /* ------------------------------------------------------------------------- */
40 
bts_Flt16Mat3D_init(struct bts_Flt16Mat3D * ptrA)41 void bts_Flt16Mat3D_init( struct bts_Flt16Mat3D* ptrA )
42 {
43 	ptrA->bbpE = 0;
44 	ptrA->xxE = 0;
45 	ptrA->xyE = 0;
46 	ptrA->xzE = 0;
47 	ptrA->yxE = 0;
48 	ptrA->yyE = 0;
49 	ptrA->yzE = 0;
50 	ptrA->zxE = 0;
51 	ptrA->zyE = 0;
52 	ptrA->zzE = 0;
53 }
54 
55 /* ------------------------------------------------------------------------- */
56 
bts_Flt16Mat3D_exit(struct bts_Flt16Mat3D * ptrA)57 void bts_Flt16Mat3D_exit( struct bts_Flt16Mat3D* ptrA )
58 {
59 	ptrA->bbpE = 0;
60 	ptrA->xxE = 0;
61 	ptrA->xyE = 0;
62 	ptrA->xzE = 0;
63 	ptrA->yxE = 0;
64 	ptrA->yyE = 0;
65 	ptrA->yzE = 0;
66 	ptrA->zxE = 0;
67 	ptrA->zyE = 0;
68 	ptrA->zzE = 0;
69 }
70 
71 /* ------------------------------------------------------------------------- */
72 
73 /* ========================================================================= */
74 /*                                                                           */
75 /* ---- \ghd{ operators } -------------------------------------------------- */
76 /*                                                                           */
77 /* ========================================================================= */
78 
79 /* ------------------------------------------------------------------------- */
80 
81 /* ========================================================================= */
82 /*                                                                           */
83 /* ---- \ghd{ query functions } -------------------------------------------- */
84 /*                                                                           */
85 /* ========================================================================= */
86 
87 /* ------------------------------------------------------------------------- */
88 
89 /* ========================================================================= */
90 /*                                                                           */
91 /* ---- \ghd{ modify functions } ------------------------------------------- */
92 /*                                                                           */
93 /* ========================================================================= */
94 
95 /* ------------------------------------------------------------------------- */
96 
97 /* ========================================================================= */
98 /*                                                                           */
99 /* ---- \ghd{ I/O } -------------------------------------------------------- */
100 /*                                                                           */
101 /* ========================================================================= */
102 
103 /* ------------------------------------------------------------------------- */
104 
bts_Flt16Mat3D_memSize(struct bbs_Context * cpA,const struct bts_Flt16Mat3D * ptrA)105 uint32 bts_Flt16Mat3D_memSize( struct bbs_Context* cpA,
106 							   const struct bts_Flt16Mat3D *ptrA )
107 {
108 	return bbs_SIZEOF16( *ptrA );
109 }
110 
111 /* ------------------------------------------------------------------------- */
112 
bts_Flt16Mat3D_memWrite(struct bbs_Context * cpA,const struct bts_Flt16Mat3D * ptrA,uint16 * memPtrA)113 uint32 bts_Flt16Mat3D_memWrite( struct bbs_Context* cpA,
114 							    const struct bts_Flt16Mat3D* ptrA,
115 								uint16* memPtrA )
116 {
117 	bbs_ERROR0( "not implemented" );
118 	return 0;
119 }
120 
121 /* ------------------------------------------------------------------------- */
122 
bts_Flt16Mat3D_memRead(struct bbs_Context * cpA,struct bts_Flt16Mat3D * ptrA,const uint16 * memPtrA)123 uint32 bts_Flt16Mat3D_memRead( struct bbs_Context* cpA,
124 							   struct bts_Flt16Mat3D* ptrA,
125 							   const uint16* memPtrA )
126 {
127 	if( bbs_Context_error( cpA ) ) return 0;
128 	bbs_ERROR0( "not implemented" );
129 	return 0;
130 }
131 
132 /* ------------------------------------------------------------------------- */
133 
134 /* ========================================================================= */
135 /*                                                                           */
136 /* ---- \ghd{ exec functions } --------------------------------------------- */
137 /*                                                                           */
138 /* ========================================================================= */
139 
140 /* ------------------------------------------------------------------------- */
141 
bts_Flt16Mat3D_createIdentity()142 struct bts_Flt16Mat3D bts_Flt16Mat3D_createIdentity()
143 {
144 	struct bts_Flt16Mat3D matL = { 1 << 14, 0, 0, 0, 1 << 14, 0, 0, 0, 1 << 14, 14 };
145 	return matL;
146 }
147 
148 /* ------------------------------------------------------------------------- */
149 
bts_Flt16Mat3D_createScale(int32 scaleA,int32 scaleBbpA)150 struct bts_Flt16Mat3D bts_Flt16Mat3D_createScale( int32 scaleA, int32 scaleBbpA )
151 {
152 	struct bts_Flt16Mat3D matL = bts_Flt16Mat3D_createIdentity();
153 	bts_Flt16Mat3D_scale( &matL, scaleA, scaleBbpA );
154 	return matL;
155 }
156 
157 /* ------------------------------------------------------------------------- */
158 
bts_Flt16Mat3D_create16(int16 xxA,int16 xyA,int16 xzA,int16 yxA,int16 yyA,int16 yzA,int16 zxA,int16 zyA,int16 zzA,int16 bbpA)159 struct bts_Flt16Mat3D bts_Flt16Mat3D_create16( int16 xxA, int16 xyA, int16 xzA,
160 											   int16 yxA, int16 yyA, int16 yzA,
161 											   int16 zxA, int16 zyA, int16 zzA,
162 											   int16 bbpA )
163 {
164 	struct bts_Flt16Mat3D matL;
165 	matL.xxE = xxA;
166 	matL.xyE = xyA;
167 	matL.xzE = xzA;
168 	matL.yxE = yxA;
169 	matL.yyE = yyA;
170 	matL.yzE = yzA;
171 	matL.zxE = zxA;
172 	matL.zyE = zyA;
173 	matL.zzE = zzA;
174 	matL.bbpE = bbpA;
175 	return matL;
176 }
177 
178 /* ------------------------------------------------------------------------- */
179 
bts_Flt16Mat3D_create32(int32 xxA,int32 xyA,int32 xzA,int32 yxA,int32 yyA,int32 yzA,int32 zxA,int32 zyA,int32 zzA,int32 bbpA)180 struct bts_Flt16Mat3D bts_Flt16Mat3D_create32( int32 xxA, int32 xyA, int32 xzA,
181 											   int32 yxA, int32 yyA, int32 yzA,
182 											   int32 zxA, int32 zyA, int32 zzA,
183 											   int32 bbpA )
184 {
185 	struct bts_Flt16Mat3D matL;
186 
187 	if( ( xxA | xyA | xzA | yxA | yyA | yzA | zxA | zyA | zzA ) == 0 )
188 	{
189 		matL.xxE = 0;
190 		matL.xyE = 0;
191 		matL.xzE = 0;
192 		matL.yxE = 0;
193 		matL.yyE = 0;
194 		matL.yzE = 0;
195 		matL.zxE = 0;
196 		matL.zyE = 0;
197 		matL.zzE = 0;
198 		matL.bbpE = 0;
199 	}
200 	else
201 	{
202 		int32 xShiftL = bts_maxAbsIntLog2Of3( xxA, xyA, xzA ) - 13;
203 		int32 yShiftL = bts_maxAbsIntLog2Of3( yxA, yyA, yzA ) - 13;
204 		int32 zShiftL = bts_maxAbsIntLog2Of3( zxA, zyA, zzA ) - 13;
205 
206 		int32 shiftL = bbs_max( bbs_max( xShiftL, yShiftL ), zShiftL );
207 
208 		if( shiftL > 0 )
209 		{
210 			int32 sh1L = shiftL - 1;
211 			matL.xxE = ( ( xxA >> sh1L ) + 1 ) >> 1;
212 			matL.xyE = ( ( xyA >> sh1L ) + 1 ) >> 1;
213 			matL.xzE = ( ( xzA >> sh1L ) + 1 ) >> 1;
214 			matL.yxE = ( ( yxA >> sh1L ) + 1 ) >> 1;
215 			matL.yyE = ( ( yyA >> sh1L ) + 1 ) >> 1;
216 			matL.yzE = ( ( yzA >> sh1L ) + 1 ) >> 1;
217 			matL.zxE = ( ( zxA >> sh1L ) + 1 ) >> 1;
218 			matL.zyE = ( ( zyA >> sh1L ) + 1 ) >> 1;
219 			matL.zzE = ( ( zzA >> sh1L ) + 1 ) >> 1;
220 		}
221 		else
222 		{
223 			matL.xxE = xxA << -shiftL;
224 			matL.xyE = xyA << -shiftL;
225 			matL.xzE = xzA << -shiftL;
226 			matL.yxE = yxA << -shiftL;
227 			matL.yyE = yyA << -shiftL;
228 			matL.yzE = yzA << -shiftL;
229 			matL.zxE = zxA << -shiftL;
230 			matL.zyE = zyA << -shiftL;
231 			matL.zzE = zzA << -shiftL;
232 		}
233 
234 		matL.bbpE = bbpA - shiftL;
235 	}
236 	return matL;
237 }
238 
239 /* ------------------------------------------------------------------------- */
240 
bts_Flt16Mat3D_scale(struct bts_Flt16Mat3D * ptrA,int32 scaleA,int32 scaleBbpA)241 void bts_Flt16Mat3D_scale( struct bts_Flt16Mat3D* ptrA, int32 scaleA, int32 scaleBbpA )
242 {
243 	/* fit scale in 15 bit */
244 	uint32 scaleExpL = bts_absIntLog2( scaleA );
245 	if( scaleExpL > 14 )
246 	{
247 		int32 shiftL = scaleExpL - 14;
248 		scaleA = ( ( scaleA >> ( shiftL - 1 ) ) + 1 ) >> 1;
249 		scaleBbpA -= shiftL;
250 	}
251 
252 	*ptrA = bts_Flt16Mat3D_create32( ptrA->xxE * scaleA, ptrA->xyE * scaleA, ptrA->xzE * scaleA,
253 									 ptrA->yxE * scaleA, ptrA->yyE * scaleA, ptrA->yzE * scaleA,
254 									 ptrA->zxE * scaleA, ptrA->zyE * scaleA, ptrA->zzE * scaleA,
255 									 ptrA->bbpE + scaleBbpA );
256 }
257 
258 /* ------------------------------------------------------------------------- */
259 #ifndef HW_EE /* causes internal compiler error in ee-gcc */
bts_Flt16Mat3D_map(const struct bts_Flt16Mat3D * matPtrA,const struct bts_Int16Vec3D * vecPtrA)260 struct bts_Int16Vec3D bts_Flt16Mat3D_map( const struct bts_Flt16Mat3D* matPtrA,
261 								          const struct bts_Int16Vec3D* vecPtrA )
262 {
263 	struct bts_Int16Vec3D vecL;
264 
265 	int32 xL = ( int32 ) matPtrA->xxE * vecPtrA->xE + ( int32 ) matPtrA->xyE * vecPtrA->yE + ( int32 ) matPtrA->xzE * vecPtrA->zE;
266 	int32 yL = ( int32 ) matPtrA->yxE * vecPtrA->xE + ( int32 ) matPtrA->yyE * vecPtrA->yE + ( int32 ) matPtrA->yzE * vecPtrA->zE;
267 	int32 zL = ( int32 ) matPtrA->zxE * vecPtrA->xE + ( int32 ) matPtrA->zyE * vecPtrA->yE + ( int32 ) matPtrA->zzE * vecPtrA->zE;
268 
269 	if( matPtrA->bbpE > 0 )
270 	{
271 		int32 sh1L = matPtrA->bbpE - 1;
272 		vecL.xE = ( ( xL >> sh1L ) + 1 ) >> 1;
273 		vecL.yE = ( ( yL >> sh1L ) + 1 ) >> 1;
274 		vecL.zE = ( ( zL >> sh1L ) + 1 ) >> 1;
275 	}
276 	else
277 	{
278 		/* not overflow safe */
279 		vecL.xE = xL << -matPtrA->bbpE;
280 		vecL.yE = yL << -matPtrA->bbpE;
281 		vecL.zE = zL << -matPtrA->bbpE;
282 	}
283 
284 	return vecL;
285 }
286 #endif
287 /* ------------------------------------------------------------------------- */
288 
bts_Flt16Mat3D_mapFlt(const struct bts_Flt16Mat3D * matPtrA,const struct bts_Flt16Vec3D * vecPtrA)289 struct bts_Flt16Vec3D bts_Flt16Mat3D_mapFlt( const struct bts_Flt16Mat3D* matPtrA,
290 								             const struct bts_Flt16Vec3D* vecPtrA )
291 {
292 	/* avoids overflow summing intermediate products */
293 	int32 xL = ( ( ( ( int32 ) matPtrA->xxE * vecPtrA->xE + 1 ) >> 1 ) +
294 				 ( ( ( int32 ) matPtrA->xyE * vecPtrA->yE + 1 ) >> 1 ) +
295 				 ( ( ( int32 ) matPtrA->xzE * vecPtrA->zE + 1 ) >> 1 ) );
296 
297 	int32 yL = ( ( ( ( int32 ) matPtrA->yxE * vecPtrA->xE + 1 ) >> 1 ) +
298 				 ( ( ( int32 ) matPtrA->yyE * vecPtrA->yE + 1 ) >> 1 ) +
299 				 ( ( ( int32 ) matPtrA->yzE * vecPtrA->zE + 1 ) >> 1 ) );
300 
301 	int32 zL = ( ( ( ( int32 ) matPtrA->zxE * vecPtrA->xE + 1 ) >> 1 ) +
302 				 ( ( ( int32 ) matPtrA->zyE * vecPtrA->yE + 1 ) >> 1 ) +
303 				 ( ( ( int32 ) matPtrA->zzE * vecPtrA->zE + 1 ) >> 1 ) );
304 
305 
306 	return bts_Flt16Vec3D_create32( xL, yL, zL, vecPtrA->bbpE + matPtrA->bbpE - 1 );
307 }
308 
309 /* ------------------------------------------------------------------------- */
310 
bts_Flt16Mat3D_mul(const struct bts_Flt16Mat3D * mat1PtrA,const struct bts_Flt16Mat3D * mat2PtrA)311 struct bts_Flt16Mat3D bts_Flt16Mat3D_mul( const struct bts_Flt16Mat3D* mat1PtrA,
312 								          const struct bts_Flt16Mat3D* mat2PtrA )
313 {
314 	/* avoids overflow summing intermediate products */
315 	return bts_Flt16Mat3D_create32(
316 
317 		( ( ( int32 ) mat1PtrA->xxE * mat2PtrA->xxE + 1 ) >> 1 ) + ( ( ( int32 ) mat1PtrA->xyE * mat2PtrA->yxE + 1 ) >> 1 ) + ( ( ( int32 ) mat1PtrA->xzE * mat2PtrA->zxE + 1 ) >> 1 ),
318 		( ( ( int32 ) mat1PtrA->xxE * mat2PtrA->xyE + 1 ) >> 1 ) + ( ( ( int32 ) mat1PtrA->xyE * mat2PtrA->yyE + 1 ) >> 1 ) + ( ( ( int32 ) mat1PtrA->xzE * mat2PtrA->zyE + 1 ) >> 1 ),
319 		( ( ( int32 ) mat1PtrA->xxE * mat2PtrA->xzE + 1 ) >> 1 ) + ( ( ( int32 ) mat1PtrA->xyE * mat2PtrA->yzE + 1 ) >> 1 ) + ( ( ( int32 ) mat1PtrA->xzE * mat2PtrA->zzE + 1 ) >> 1 ),
320 
321 		( ( ( int32 ) mat1PtrA->yxE * mat2PtrA->xxE + 1 ) >> 1 ) + ( ( ( int32 ) mat1PtrA->yyE * mat2PtrA->yxE + 1 ) >> 1 ) + ( ( ( int32 ) mat1PtrA->yzE * mat2PtrA->zxE + 1 ) >> 1 ),
322 		( ( ( int32 ) mat1PtrA->yxE * mat2PtrA->xyE + 1 ) >> 1 ) + ( ( ( int32 ) mat1PtrA->yyE * mat2PtrA->yyE + 1 ) >> 1 ) + ( ( ( int32 ) mat1PtrA->yzE * mat2PtrA->zyE + 1 ) >> 1 ),
323 		( ( ( int32 ) mat1PtrA->yxE * mat2PtrA->xzE + 1 ) >> 1 ) + ( ( ( int32 ) mat1PtrA->yyE * mat2PtrA->yzE + 1 ) >> 1 ) + ( ( ( int32 ) mat1PtrA->yzE * mat2PtrA->zzE + 1 ) >> 1 ),
324 
325 		( ( ( int32 ) mat1PtrA->zxE * mat2PtrA->xxE + 1 ) >> 1 ) + ( ( ( int32 ) mat1PtrA->zyE * mat2PtrA->yxE + 1 ) >> 1 ) + ( ( ( int32 ) mat1PtrA->zzE * mat2PtrA->zxE + 1 ) >> 1 ),
326 		( ( ( int32 ) mat1PtrA->zxE * mat2PtrA->xyE + 1 ) >> 1 ) + ( ( ( int32 ) mat1PtrA->zyE * mat2PtrA->yyE + 1 ) >> 1 ) + ( ( ( int32 ) mat1PtrA->zzE * mat2PtrA->zyE + 1 ) >> 1 ),
327 		( ( ( int32 ) mat1PtrA->zxE * mat2PtrA->xzE + 1 ) >> 1 ) + ( ( ( int32 ) mat1PtrA->zyE * mat2PtrA->yzE + 1 ) >> 1 ) + ( ( ( int32 ) mat1PtrA->zzE * mat2PtrA->zzE + 1 ) >> 1 ),
328 
329 		mat1PtrA->bbpE + mat2PtrA->bbpE - 1 );
330 }
331 
332 /* ------------------------------------------------------------------------- */
333 
bts_Flt16Mat3D_mulTo(struct bts_Flt16Mat3D * mat1PtrA,const struct bts_Flt16Mat3D * mat2PtrA)334 struct bts_Flt16Mat3D* bts_Flt16Mat3D_mulTo( struct bts_Flt16Mat3D* mat1PtrA,
335 				                             const struct bts_Flt16Mat3D* mat2PtrA )
336 {
337 	*mat1PtrA = bts_Flt16Mat3D_mul( mat1PtrA, mat2PtrA );
338 	return mat1PtrA;
339 }
340 
341 /* ------------------------------------------------------------------------- */
342 
343 /* ========================================================================= */
344 
345