1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /* ---- includes ----------------------------------------------------------- */
18 
19 #include "b_TensorEm/CompactMat.h"
20 #include "b_TensorEm/Functions.h"
21 #include "b_BasicEm/Math.h"
22 #include "b_BasicEm/Functions.h"
23 #include "b_BasicEm/Memory.h"
24 
25 /* ------------------------------------------------------------------------- */
26 
27 /* ========================================================================= */
28 /*                                                                           */
29 /* ---- \ghd{ auxiliary functions } ---------------------------------------- */
30 /*                                                                           */
31 /* ========================================================================= */
32 
33 /* ------------------------------------------------------------------------- */
34 
35 /** Returns dot product of inVec with indexed row
36     The result is a floating point expresstion:
37 		upper 16 bit: signed value
38 		lower 16 bit: signed exponent
39  */
bts_CompactMat_fltDotPrdRow(struct bbs_Context * cpA,struct bts_CompactMat * ptrA,const int16 * inVecA,uint32 inNormBitsA,uint32 rowA)40 int32 bts_CompactMat_fltDotPrdRow( struct bbs_Context* cpA,
41 								   struct bts_CompactMat* ptrA,
42 							       const int16* inVecA,
43 							       uint32 inNormBitsA,
44 							       uint32 rowA )
45 {
46 	const int16* rowPtrL = ptrA->cpsArrE.arrPtrE + ptrA->wordsPerRowE * rowA;
47 
48 	/* extract row-header info */
49 	uint32 offsL = *rowPtrL++;
50 	uint32 sizeL = *rowPtrL++;
51 	int32 factorManL = *rowPtrL++;
52 	int32 factorExpL = *rowPtrL++;
53 	uint32 rowNormBitsL = *rowPtrL++;
54 
55 	/* consider possible overflow */
56 	uint16 overflowBitsL = ( inNormBitsA + rowNormBitsL >= 31 ) ? inNormBitsA + rowNormBitsL - 31 : 0;
57 
58 	const int16* inPtrL = inVecA + offsL;
59 
60 	count_t iL;
61 	int32 sumL = 0;
62 
63 	if( overflowBitsL == 0 ) /* raw dot product fits in int32 */
64 	{
65 		switch( ptrA->bitsPerValueE )
66 		{
67 			case 16:
68 			{
69 				for( iL = sizeL; iL > 0; iL-- ) sumL += ( ( int32 )*rowPtrL++ * ( int32 )*inPtrL++ );
70 			}
71 			break;
72 
73 			#ifndef HW_TMS320C5x /* platforms that don't have int8 must use the 'default' implementation */
74 
75 			case 8:
76 			{
77 				const uint16* dpL = ( uint16* )rowPtrL;
78 				for( iL = sizeL; iL >= 8; iL -= 8 )
79 				{
80 					sumL += ( ( int8 )  dpL[ 0 ]         * ( int32 )inPtrL[ 0 ] );
81 					sumL += ( ( int8 )( dpL[ 0 ] >>  8 ) * ( int32 )inPtrL[ 1 ] );
82 					sumL += ( ( int8 )  dpL[ 1 ]         * ( int32 )inPtrL[ 2 ] );
83 					sumL += ( ( int8 )( dpL[ 1 ] >>  8 ) * ( int32 )inPtrL[ 3 ] );
84 					sumL += ( ( int8 )  dpL[ 2 ]         * ( int32 )inPtrL[ 4 ] );
85 					sumL += ( ( int8 )( dpL[ 2 ] >>  8 ) * ( int32 )inPtrL[ 5 ] );
86 					sumL += ( ( int8 )  dpL[ 3 ]         * ( int32 )inPtrL[ 6 ] );
87 					sumL += ( ( int8 )( dpL[ 3 ] >>  8 ) * ( int32 )inPtrL[ 7 ] );
88 					dpL += 4;
89 					inPtrL += 8;
90 				}
91 				for( ; iL >= 2; iL -= 2 )
92 				{
93 					sumL += ( ( int8 )  *dpL         * ( int32 )inPtrL[ 0 ] );
94 					sumL += ( ( int8 )( *dpL >>  8 ) * ( int32 )inPtrL[ 1 ] );
95 					dpL++;
96 					inPtrL += 2;
97 				}
98 				if( iL > 0 )
99 				{
100 					sumL += ( ( int8 )*dpL++ * ( int32 )inPtrL[ 0 ] );
101 				}
102 			}
103 			break;
104 
105 			case 6:
106 			{
107 				const uint16* dpL = ( uint16* )rowPtrL;
108 				for( iL = sizeL; iL >= 8; iL -= 8 )
109 				{
110 					int32 lSumL = 0;
111 					lSumL += ( ( int8 )     ( dpL[ 0 ] <<  2 )                                  * ( int32 )inPtrL[ 0 ] );
112 					lSumL += ( ( int8 ) (   ( dpL[ 0 ] >>  4 )                       & 0x00FC ) * ( int32 )inPtrL[ 1 ] );
113 					lSumL += ( ( int8 ) ( ( ( dpL[ 0 ] >> 10 ) | ( dpL[ 1 ] << 6 ) ) & 0x00FC ) * ( int32 )inPtrL[ 2 ] );
114 					lSumL += ( ( int8 ) (   ( dpL[ 1 ]       )                       & 0x00FC ) * ( int32 )inPtrL[ 3 ] );
115 					lSumL += ( ( int8 ) (   ( dpL[ 1 ] >>  6 )                       & 0x00FC ) * ( int32 )inPtrL[ 4 ] );
116 					lSumL += ( ( int8 ) ( ( ( dpL[ 1 ] >> 12 ) | ( dpL[ 2 ] << 4 ) ) & 0x00FC ) * ( int32 )inPtrL[ 5 ] );
117 					lSumL += ( ( int8 ) (   ( dpL[ 2 ] >>  2 )                       & 0x00FC ) * ( int32 )inPtrL[ 6 ] );
118 					lSumL += ( ( int8 ) (   ( dpL[ 2 ] >>  8 )                       & 0x00FC ) * ( int32 )inPtrL[ 7 ] );
119 					sumL += ( lSumL >> 2 );
120 					dpL += 3;
121 					inPtrL += 8;
122 				}
123 
124 				{
125 					int32 lSumL = 0;
126 					if( iL > 0 ) lSumL += ( ( int8 )     ( dpL[ 0 ] <<  2 )                                  * ( int32 )inPtrL[ 0 ] );
127 					if( iL > 1 ) lSumL += ( ( int8 ) (   ( dpL[ 0 ] >>  4 )                       & 0x00FC ) * ( int32 )inPtrL[ 1 ] );
128 					if( iL > 2 ) lSumL += ( ( int8 ) ( ( ( dpL[ 0 ] >> 10 ) | ( dpL[ 1 ] << 6 ) ) & 0x00FC ) * ( int32 )inPtrL[ 2 ] );
129 					if( iL > 3 ) lSumL += ( ( int8 ) (   ( dpL[ 1 ]       )                       & 0x00FC ) * ( int32 )inPtrL[ 3 ] );
130 					if( iL > 4 ) lSumL += ( ( int8 ) (   ( dpL[ 1 ] >>  6 )                       & 0x00FC ) * ( int32 )inPtrL[ 4 ] );
131 					if( iL > 5 ) lSumL += ( ( int8 ) ( ( ( dpL[ 1 ] >> 12 ) | ( dpL[ 2 ] << 4 ) ) & 0x00FC ) * ( int32 )inPtrL[ 5 ] );
132 					if( iL > 6 ) lSumL += ( ( int8 ) (   ( dpL[ 2 ] >>  2 )                       & 0x00FC ) * ( int32 )inPtrL[ 6 ] );
133 					sumL += ( lSumL >> 2 );
134 				}
135 			}
136 			break;
137 
138 			case 5:
139 			{
140 				const uint16* dpL = ( uint16* )rowPtrL;
141 				for( iL = sizeL; iL >= 16; iL -= 16 )
142 				{
143 					int32 lSumL = 0;
144 					lSumL += ( ( int8 )     ( dpL[ 0 ] <<  3 )                                  * ( int32 )inPtrL[  0 ] );
145 					lSumL += ( ( int8 ) (   ( dpL[ 0 ] >>  2 )                       & 0x00F8 ) * ( int32 )inPtrL[  1 ] );
146 					lSumL += ( ( int8 ) (   ( dpL[ 0 ] >>  7 )                       & 0x00F8 ) * ( int32 )inPtrL[  2 ] );
147 					lSumL += ( ( int8 ) ( ( ( dpL[ 0 ] >> 12 ) | ( dpL[ 1 ] << 4 ) ) & 0x00F8 ) * ( int32 )inPtrL[  3 ] );
148 					lSumL += ( ( int8 ) (   ( dpL[ 1 ] >>  1 )                       & 0x00F8 ) * ( int32 )inPtrL[  4 ] );
149 					lSumL += ( ( int8 ) (   ( dpL[ 1 ] >>  6 )                       & 0x00F8 ) * ( int32 )inPtrL[  5 ] );
150 					lSumL += ( ( int8 ) ( ( ( dpL[ 1 ] >> 11 ) | ( dpL[ 2 ] << 5 ) ) & 0x00F8 ) * ( int32 )inPtrL[  6 ] );
151 					lSumL += ( ( int8 ) (   ( dpL[ 2 ]       )                       & 0x00F8 ) * ( int32 )inPtrL[  7 ] );
152 					lSumL += ( ( int8 ) (   ( dpL[ 2 ] >>  5 )                       & 0x00F8 ) * ( int32 )inPtrL[  8 ] );
153 					lSumL += ( ( int8 ) ( ( ( dpL[ 2 ] >> 10 ) | ( dpL[ 3 ] << 6 ) ) & 0x00F8 ) * ( int32 )inPtrL[  9 ] );
154 					lSumL += ( ( int8 ) (   ( dpL[ 3 ] <<  1 )                       & 0x00F8 ) * ( int32 )inPtrL[ 10 ] );
155 					lSumL += ( ( int8 ) (   ( dpL[ 3 ] >>  4 )                       & 0x00F8 ) * ( int32 )inPtrL[ 11 ] );
156 					lSumL += ( ( int8 ) ( ( ( dpL[ 3 ] >>  9 ) | ( dpL[ 4 ] << 7 ) ) & 0x00F8 ) * ( int32 )inPtrL[ 12 ] );
157 					lSumL += ( ( int8 ) (   ( dpL[ 4 ] <<  2 )                       & 0x00F8 ) * ( int32 )inPtrL[ 13 ] );
158 					lSumL += ( ( int8 ) (   ( dpL[ 4 ] >>  3 )                       & 0x00F8 ) * ( int32 )inPtrL[ 14 ] );
159 					lSumL += ( ( int8 ) (   ( dpL[ 4 ] >>  8 )                       & 0x00F8 ) * ( int32 )inPtrL[ 15 ] );
160 					sumL += ( lSumL >> 3 );
161 					dpL += 5;
162 					inPtrL += 16;
163 				}
164 
165 				{
166 					int32 lSumL = 0;
167 					if( iL >  0 ) lSumL += ( ( int8 )     ( dpL[ 0 ] <<  3 )                                  * ( int32 )inPtrL[  0 ] );
168 					if( iL >  1 ) lSumL += ( ( int8 ) (   ( dpL[ 0 ] >>  2 )                       & 0x00F8 ) * ( int32 )inPtrL[  1 ] );
169 					if( iL >  2 ) lSumL += ( ( int8 ) (   ( dpL[ 0 ] >>  7 )                       & 0x00F8 ) * ( int32 )inPtrL[  2 ] );
170 					if( iL >  3 ) lSumL += ( ( int8 ) ( ( ( dpL[ 0 ] >> 12 ) | ( dpL[ 1 ] << 4 ) ) & 0x00F8 ) * ( int32 )inPtrL[  3 ] );
171 					if( iL >  4 ) lSumL += ( ( int8 ) (   ( dpL[ 1 ] >>  1 )                       & 0x00F8 ) * ( int32 )inPtrL[  4 ] );
172 					if( iL >  5 ) lSumL += ( ( int8 ) (   ( dpL[ 1 ] >>  6 )                       & 0x00F8 ) * ( int32 )inPtrL[  5 ] );
173 					if( iL >  6 ) lSumL += ( ( int8 ) ( ( ( dpL[ 1 ] >> 11 ) | ( dpL[ 2 ] << 5 ) ) & 0x00F8 ) * ( int32 )inPtrL[  6 ] );
174 					if( iL >  7 ) lSumL += ( ( int8 ) (   ( dpL[ 2 ]       )                       & 0x00F8 ) * ( int32 )inPtrL[  7 ] );
175 					if( iL >  8 ) lSumL += ( ( int8 ) (   ( dpL[ 2 ] >>  5 )                       & 0x00F8 ) * ( int32 )inPtrL[  8 ] );
176 					if( iL >  9 ) lSumL += ( ( int8 ) ( ( ( dpL[ 2 ] >> 10 ) | ( dpL[ 3 ] << 6 ) ) & 0x00F8 ) * ( int32 )inPtrL[  9 ] );
177 					if( iL > 10 ) lSumL += ( ( int8 ) (   ( dpL[ 3 ] <<  1 )                       & 0x00F8 ) * ( int32 )inPtrL[ 10 ] );
178 					if( iL > 11 ) lSumL += ( ( int8 ) (   ( dpL[ 3 ] >>  4 )                       & 0x00F8 ) * ( int32 )inPtrL[ 11 ] );
179 					if( iL > 12 ) lSumL += ( ( int8 ) ( ( ( dpL[ 3 ] >>  9 ) | ( dpL[ 4 ] << 7 ) ) & 0x00F8 ) * ( int32 )inPtrL[ 12 ] );
180 					if( iL > 13 ) lSumL += ( ( int8 ) (   ( dpL[ 4 ] <<  2 )                       & 0x00F8 ) * ( int32 )inPtrL[ 13 ] );
181 					if( iL > 14 ) lSumL += ( ( int8 ) (   ( dpL[ 4 ] >>  3 )                       & 0x00F8 ) * ( int32 )inPtrL[ 14 ] );
182 					sumL += ( lSumL >> 3 );
183 				}
184 			}
185 			break;
186 
187 			case 4:
188 			{
189 				for( iL = sizeL; iL >= 4; iL -= 4 )
190 				{
191 					uint16 v1L = *rowPtrL++;
192 					int32 lSumL = 0;
193 					lSumL += ( ( int8 )( ( v1L << 4 )        ) * ( int32 )inPtrL[ 0 ] );
194 					lSumL += ( ( int8 )( ( v1L      ) & 0xF0 ) * ( int32 )inPtrL[ 1 ] );
195 					lSumL += ( ( int8 )( ( v1L >> 4 ) & 0xF0 ) * ( int32 )inPtrL[ 2 ] );
196 					lSumL += ( ( int8 )( ( v1L >> 8 ) & 0xF0 ) * ( int32 )inPtrL[ 3 ] );
197 					inPtrL += 4;
198 					sumL += ( lSumL >> 4 );
199 				}
200 				{
201 					uint16 v1L = *rowPtrL++;
202 					int32 lSumL = 0;
203 					if( iL-- > 0 ) lSumL += ( ( int8 )( ( v1L << 4 )        ) * ( int32 )inPtrL[ 0 ] );
204 					if( iL-- > 0 ) lSumL += ( ( int8 )( ( v1L      ) & 0xF0 ) * ( int32 )inPtrL[ 1 ] );
205 					if( iL-- > 0 ) lSumL += ( ( int8 )( ( v1L >> 4 ) & 0xF0 ) * ( int32 )inPtrL[ 2 ] );
206 					sumL += ( lSumL >> 4 );
207 				}
208 			}
209 			break;
210 
211 			#endif /*ifndef HW_TMS320C5x*/
212 
213 			/* The default case can process all bit sizes including those that are explicitly encoded above
214 			 * Use the default for all bit sizes when the platform cannot handle the int8 data type (e.g. HW_TMS320C5x)
215 			 */
216 			default:
217 			{
218 				uint32 bfL = ( ( uint32 )*rowPtrL++ ) << 16;
219 				uint32 bitsL = ptrA->bitsPerValueE;
220 				uint16 adjL = 16 - bitsL;
221 				uint32 mkL = ( ( 1 << bitsL ) - 1 ) << adjL;
222 				uint32 srL = bitsL;
223 				for( iL = 0; iL < sizeL; iL++ )
224 				{
225 					if( srL > 16 )
226 					{
227 						bfL = ( ( ( uint32 )*rowPtrL++ ) << 16 ) | ( bfL >> 16 );
228 						srL -= 16;
229 					}
230 					sumL += ( ( int16 )( ( bfL >> srL ) & mkL ) * ( int32 )inPtrL[ iL ] ) >> adjL;
231 					srL += bitsL;
232 				}
233 			}
234 		}
235 	}
236 	else /* raw dot product does not fit in int32 */
237 	{
238 		int32 roundL = 1 << ( overflowBitsL - 1 );
239 		switch( ptrA->bitsPerValueE )
240 		{
241 			case 16:
242 			{
243 				for( iL = sizeL; iL > 0; iL-- ) sumL += ( ( ( int32 )*rowPtrL++ * ( int32 )*inPtrL++ ) + roundL ) >> overflowBitsL;
244 			}
245 			break;
246 
247 			case 8:
248 			{
249 				for( iL = sizeL; iL >= 2; iL -= 2 )
250 				{
251 					uint16 v1L = *rowPtrL++;
252 					int32 lSumL =   ( ( int8 )  v1L         * ( int32 )inPtrL[ 0 ] )
253 						          + ( ( int8 )( v1L >>  8 ) * ( int32 )inPtrL[ 1 ] );
254 					sumL += ( lSumL + roundL ) >> overflowBitsL;
255 					inPtrL += 2;
256 				}
257 				if( iL > 0 )
258 				{
259 					sumL += ( ( ( int8 )*rowPtrL++ * ( int32 )inPtrL[ 0 ] ) + roundL ) >> overflowBitsL;
260 				}
261 			}
262 			break;
263 
264 			case 4:
265 			{
266 				for( iL = sizeL; iL >= 4; iL -= 4 )
267 				{
268 					uint16 v1L = *rowPtrL++;
269 					int32 lSumL = 0;
270 					lSumL += ( ( int8 )( ( v1L << 4 )        ) * ( int32 )inPtrL[ 0 ] );
271 					lSumL += ( ( int8 )( ( v1L      ) & 0xF0 ) * ( int32 )inPtrL[ 1 ] );
272 					lSumL += ( ( int8 )( ( v1L >> 4 ) & 0xF0 ) * ( int32 )inPtrL[ 2 ] );
273 					lSumL += ( ( int8 )( ( v1L >> 8 ) & 0xF0 ) * ( int32 )inPtrL[ 3 ] );
274 					inPtrL += 4;
275 					sumL += ( ( lSumL >> 4 ) + roundL ) >> overflowBitsL;
276 				}
277 				{
278 					uint16 v1L = *rowPtrL++;
279 					int32 lSumL = 0;
280 					if( iL-- > 0 ) lSumL += ( ( int8 )( ( v1L << 4 )        ) * ( int32 )inPtrL[ 0 ] );
281 					if( iL-- > 0 ) lSumL += ( ( int8 )( ( v1L      ) & 0xF0 ) * ( int32 )inPtrL[ 1 ] );
282 					if( iL-- > 0 ) lSumL += ( ( int8 )( ( v1L >> 4 ) & 0xF0 ) * ( int32 )inPtrL[ 2 ] );
283 					sumL += ( ( lSumL >> 4 ) + roundL ) >> overflowBitsL;
284 				}
285 			}
286 			break;
287 
288 			default:
289 			{
290 				uint32 bfL = ( ( uint32 )*rowPtrL++ ) << 16;
291 				uint32 bitsL = ptrA->bitsPerValueE;
292 				uint16 adjL = 16 - bitsL;
293 				uint32 mkL = ( ( 1 << bitsL ) - 1 ) << adjL;
294 				uint32 srL = bitsL;
295 				int32 lRoundL = roundL << adjL;
296 				int32 lAdjL = overflowBitsL + adjL;
297 				for( iL = 0; iL < sizeL; iL++ )
298 				{
299 					if( srL > 16 )
300 					{
301 						bfL = ( ( ( uint32 )*rowPtrL++ ) << 16 ) | ( bfL >> 16 );
302 						srL -= 16;
303 					}
304 					sumL += ( ( int16 )( ( bfL >> srL ) & mkL ) * ( int32 )inPtrL[ iL ] + lRoundL ) >> lAdjL;
305 					srL += bitsL;
306 				}
307 			}
308 		}
309 	}
310 
311 	/* compute result */
312 	{
313 		int32 resultManL;
314 		int32 resultExpL;
315 		int32 resultLogL;
316 		bbs_mulS32( sumL, factorManL, &resultManL, &resultExpL );
317 		resultExpL += factorExpL + overflowBitsL;
318 		resultLogL = bbs_intLog2( resultManL > 0 ? resultManL : -resultManL );
319 		if( resultLogL < 30 )
320 		{
321 			resultManL <<= 30 - resultLogL;
322 			resultExpL  -= 30 - resultLogL;
323 		}
324 
325 		resultManL = ( ( resultManL >> 15 ) + 1 ) >> 1;
326 		resultExpL = resultExpL + 16;
327 
328 		return ( ( resultManL & 0x0000FFFF ) << 16 ) | ( resultExpL & 0x0000FFFF );
329 	}
330 }
331 
332 /* ------------------------------------------------------------------------- */
333 
334 /* ========================================================================= */
335 /*                                                                           */
336 /* ---- \ghd{ constructor / destructor } ----------------------------------- */
337 /*                                                                           */
338 /* ========================================================================= */
339 
340 /* ------------------------------------------------------------------------- */
341 
bts_CompactMat_init(struct bbs_Context * cpA,struct bts_CompactMat * ptrA)342 void bts_CompactMat_init( struct bbs_Context* cpA,
343 					      struct bts_CompactMat* ptrA )
344 {
345 	ptrA->widthE = 0;
346 	ptrA->heightE = 0;
347 	ptrA->bitsPerValueE = 0;
348 	ptrA->wordsPerRowE = 0;
349 	ptrA->maxRowBitsE = 0;
350 	bbs_Int16Arr_init( cpA, &ptrA->cpsArrE );
351 	bbs_Int16Arr_init( cpA, &ptrA->expArrE );
352 
353 }
354 
355 /* ------------------------------------------------------------------------- */
356 
bts_CompactMat_exit(struct bbs_Context * cpA,struct bts_CompactMat * ptrA)357 void bts_CompactMat_exit( struct bbs_Context* cpA,
358 					    struct bts_CompactMat* ptrA )
359 {
360 	ptrA->widthE = 0;
361 	ptrA->heightE = 0;
362 	ptrA->bitsPerValueE = 0;
363 	ptrA->wordsPerRowE = 0;
364 	ptrA->maxRowBitsE = 0;
365 	bbs_Int16Arr_exit( cpA, &ptrA->cpsArrE );
366 	bbs_Int16Arr_exit( cpA, &ptrA->expArrE );
367 }
368 /* ------------------------------------------------------------------------- */
369 
370 /* ========================================================================= */
371 /*                                                                           */
372 /* ---- \ghd{ operators } -------------------------------------------------- */
373 /*                                                                           */
374 /* ========================================================================= */
375 
376 /* ------------------------------------------------------------------------- */
377 
378 /* ========================================================================= */
379 /*                                                                           */
380 /* ---- \ghd{ query functions } -------------------------------------------- */
381 /*                                                                           */
382 /* ========================================================================= */
383 
384 /* ------------------------------------------------------------------------- */
385 
386 /* ========================================================================= */
387 /*                                                                           */
388 /* ---- \ghd{ modify functions } ------------------------------------------- */
389 /*                                                                           */
390 /* ========================================================================= */
391 
392 /* ------------------------------------------------------------------------- */
393 
bts_CompactMat_create(struct bbs_Context * cpA,struct bts_CompactMat * ptrA,uint32 widthA,uint32 heightA,uint32 bitsA,uint32 maxRowSizeA,struct bbs_MemSeg * mspA)394 void bts_CompactMat_create( struct bbs_Context* cpA,
395 						    struct bts_CompactMat* ptrA,
396 						    uint32 widthA,
397 						    uint32 heightA,
398 						    uint32 bitsA,
399 							uint32 maxRowSizeA,
400 				            struct bbs_MemSeg* mspA )
401 {
402 	if( bbs_Context_error( cpA ) ) return;
403 	if( bitsA < 2 || bitsA > 16 )
404 	{
405 		bbs_ERROR0( "bts_CompactMat_create:\nbitsA must be between 2 and 16" );
406 		return;
407 	}
408 
409 	ptrA->widthE = widthA;
410 	ptrA->heightE = heightA;
411 	ptrA->bitsPerValueE = bitsA;
412 	ptrA->wordsPerRowE = 6 /*header + 1*/ + ( ( maxRowSizeA * bitsA ) / ( 8 * sizeof( short ) ) );
413 	ptrA->maxRowBitsE = 0;
414 	if( ( ptrA->wordsPerRowE & 1 ) != 0 ) ptrA->wordsPerRowE++;
415 	bbs_Int16Arr_create( cpA, &ptrA->cpsArrE, heightA * ptrA->wordsPerRowE, mspA );
416 	bbs_Int16Arr_fill( cpA, &ptrA->cpsArrE, 0 );
417 	bbs_Int16Arr_create( cpA, &ptrA->expArrE, ptrA->heightE, mspA );
418 	bbs_Int16Arr_fill( cpA, &ptrA->expArrE, 0 );
419 }
420 
421 /* ------------------------------------------------------------------------- */
422 
bts_CompactMat_copy(struct bbs_Context * cpA,struct bts_CompactMat * ptrA,const struct bts_CompactMat * srcPtrA)423 void bts_CompactMat_copy( struct bbs_Context* cpA,
424 					      struct bts_CompactMat* ptrA,
425 						  const struct bts_CompactMat* srcPtrA )
426 {
427 	ptrA->widthE = srcPtrA->widthE;
428 	ptrA->heightE = srcPtrA->heightE;
429 	ptrA->bitsPerValueE = srcPtrA->bitsPerValueE;
430 	ptrA->wordsPerRowE = srcPtrA->wordsPerRowE;
431 	ptrA->maxRowBitsE = srcPtrA->maxRowBitsE;
432 	bbs_Int16Arr_copy( cpA, &ptrA->cpsArrE, &srcPtrA->cpsArrE );
433 	bbs_Int16Arr_size( cpA, &ptrA->expArrE, ptrA->heightE );
434 }
435 
436 /* ------------------------------------------------------------------------- */
437 
438 /* ========================================================================= */
439 /*                                                                           */
440 /* ---- \ghd{ I/O } -------------------------------------------------------- */
441 /*                                                                           */
442 /* ========================================================================= */
443 
444 /* ------------------------------------------------------------------------- */
445 
bts_CompactMat_memSize(struct bbs_Context * cpA,const struct bts_CompactMat * ptrA)446 uint32 bts_CompactMat_memSize( struct bbs_Context* cpA,
447 							 const struct bts_CompactMat *ptrA )
448 {
449 	return  bbs_SIZEOF16( uint32 )
450 		  + bbs_SIZEOF16( uint32 ) /* version */
451 		  + bbs_SIZEOF16( ptrA->widthE )
452 		  + bbs_SIZEOF16( ptrA->heightE )
453 		  + bbs_SIZEOF16( ptrA->bitsPerValueE )
454 		  + bbs_SIZEOF16( ptrA->wordsPerRowE )
455 		  + bbs_SIZEOF16( ptrA->maxRowBitsE )
456 		  + bbs_Int16Arr_memSize( cpA, &ptrA->cpsArrE );
457 }
458 
459 /* ------------------------------------------------------------------------- */
460 
bts_CompactMat_memWrite(struct bbs_Context * cpA,const struct bts_CompactMat * ptrA,uint16 * memPtrA)461 uint32 bts_CompactMat_memWrite( struct bbs_Context* cpA,
462 							  const struct bts_CompactMat* ptrA,
463 							  uint16* memPtrA )
464 {
465 	uint32 memSizeL = bts_CompactMat_memSize( cpA, ptrA );
466 	memPtrA += bbs_memWrite32( &memSizeL, memPtrA );
467 	memPtrA += bbs_memWriteUInt32( bts_COMPACT_MAT_VERSION, memPtrA );
468 	memPtrA += bbs_memWrite32( &ptrA->widthE, memPtrA );
469 	memPtrA += bbs_memWrite32( &ptrA->heightE, memPtrA );
470 	memPtrA += bbs_memWrite32( &ptrA->bitsPerValueE, memPtrA );
471 	memPtrA += bbs_memWrite32( &ptrA->wordsPerRowE, memPtrA );
472 	memPtrA += bbs_memWrite32( &ptrA->maxRowBitsE, memPtrA );
473 	memPtrA += bbs_Int16Arr_memWrite( cpA, &ptrA->cpsArrE, memPtrA );
474 	return memSizeL;
475 }
476 
477 /* ------------------------------------------------------------------------- */
478 
bts_CompactMat_memRead(struct bbs_Context * cpA,struct bts_CompactMat * ptrA,const uint16 * memPtrA,struct bbs_MemSeg * mspA)479 uint32 bts_CompactMat_memRead( struct bbs_Context* cpA,
480 							 struct bts_CompactMat* ptrA,
481 							 const uint16* memPtrA,
482 				             struct bbs_MemSeg* mspA )
483 {
484 	uint32 memSizeL, versionL;
485 	if( bbs_Context_error( cpA ) ) return 0;
486 	memPtrA += bbs_memRead32( &memSizeL, memPtrA );
487 	memPtrA += bbs_memReadVersion32( cpA, &versionL, bts_COMPACT_MAT_VERSION, memPtrA );
488 	memPtrA += bbs_memRead32( &ptrA->widthE, memPtrA );
489 	memPtrA += bbs_memRead32( &ptrA->heightE, memPtrA );
490 	memPtrA += bbs_memRead32( &ptrA->bitsPerValueE, memPtrA );
491 	memPtrA += bbs_memRead32( &ptrA->wordsPerRowE, memPtrA );
492 	memPtrA += bbs_memRead32( &ptrA->maxRowBitsE, memPtrA );
493 	memPtrA += bbs_Int16Arr_memRead( cpA, &ptrA->cpsArrE, memPtrA, mspA );
494 
495 	if( memSizeL != bts_CompactMat_memSize( cpA, ptrA ) )
496 	{
497 		bbs_ERR0( bbs_ERR_CORRUPT_DATA, "uint32 bts_CompactMat_memRead( const struct bts_CompactMat* ptrA, const void* memPtrA ):\n"
498                   "size mismatch" );
499 	}
500 
501 	bbs_Int16Arr_create( cpA, &ptrA->expArrE, ptrA->heightE, mspA );
502 	bbs_Int16Arr_fill( cpA, &ptrA->expArrE, 0 );
503 
504 	return memSizeL;
505 }
506 
507 /* ------------------------------------------------------------------------- */
508 
509 /* ========================================================================= */
510 /*                                                                           */
511 /* ---- \ghd{ exec functions } --------------------------------------------- */
512 /*                                                                           */
513 /* ========================================================================= */
514 
515 /* ------------------------------------------------------------------------- */
516 
bts_CompactMat_map(struct bbs_Context * cpA,const struct bts_CompactMat * ptrA,const int16 * inVecA,int16 * outVecA,int16 * outExpPtrA)517 void bts_CompactMat_map( struct bbs_Context* cpA,
518 						 const struct bts_CompactMat* ptrA,
519 						 const int16* inVecA,
520 						 int16* outVecA,
521 						 int16* outExpPtrA )
522 {
523 	uint32 inNormBitsL = bbs_intLog2( bbs_vecNorm16( inVecA, ptrA->widthE ) ) + 1;
524 	uint32 iL;
525 
526 	int16* expArrL = ( ( struct bts_CompactMat* )ptrA )->expArrE.arrPtrE;
527 	int16 maxExpL = -32767;
528 
529 	for( iL = 0; iL < ptrA->heightE; iL++ )
530 	{
531 		int32 fltL = bts_CompactMat_fltDotPrdRow( cpA, ( struct bts_CompactMat* )ptrA, inVecA, inNormBitsL, iL );
532 		outVecA[ iL ] = fltL >> 16;
533 		expArrL[ iL ] = fltL & 0x0000FFFF;
534 
535 		maxExpL = ( expArrL[ iL ] > maxExpL ) ? expArrL[ iL ] : maxExpL;
536 	}
537 
538 	if( outExpPtrA != NULL ) *outExpPtrA = maxExpL;
539 
540 	for( iL = 0; iL < ptrA->heightE; iL++ )
541 	{
542 		int32 shrL = maxExpL - expArrL[ iL ];
543 		if( shrL > 0 )
544 		{
545 			outVecA[ iL ] = ( ( outVecA[ iL ] >> ( shrL - 1 ) ) + 1 ) >> 1;
546 		}
547 	}
548 }
549 
550 /* ------------------------------------------------------------------------- */
551 
552 /* ========================================================================= */
553 
554