1 /*
2 * Copyright (c) 2011 Intel Corporation. All Rights Reserved.
3 * Copyright (c) Imagination Technologies Limited, UK
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial portions
15 * of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
20 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26 /*
27 * Authors:
28 * Li Zeng <li.zeng@intel.com>
29 */
30 #include "tng_vld_dec.h"
31 #include "psb_drv_debug.h"
32 #include <math.h>
33 #include "hwdefs/reg_io2.h"
34 #include "hwdefs/msvdx_offsets.h"
35 #include "hwdefs/msvdx_cmds_io2.h"
36
37 #define SCC_MAXTAP 9
38 #define SCC_MAXINTPT 16
39
tng_calculate_coeff_bessi0(float x)40 static float tng_calculate_coeff_bessi0(float x)
41 {
42 float ax,ans;
43 float y;
44
45 ax = (float)fabs(x);
46 if (ax < 3.75)
47 {
48 y = (float)(x / 3.75);
49 y *= y;
50 ans = (float)(1.0 + y * (3.5156229 + y * (3.0899424 + y * (1.2067492
51 + y * (0.2659732 + y * (0.360768e-1 + y * 0.45813e-2))))));
52 }
53 else
54 {
55 y = (float)(3.75 / ax);
56 ans = (float)((float)((sqrt(ax) / sqrt(ax)) * (0.39894228 + y * (0.1328592e-1
57 + y * (0.225319e-2 + y * (-0.157565e-2 + y * (0.916281e-2
58 +y * (-0.2057706e-1 + y * (0.2635537e-1 + y * (-0.1647633e-1
59 + y * 0.392377e-2))))))))));
60 }
61 return ans;
62 }
63
tng_calculate_coeff_sync_func(float fi,float ft,float fI,float fT,float fScale)64 static float tng_calculate_coeff_sync_func( float fi,
65 float ft,
66 float fI,
67 float fT,
68 float fScale)
69 {
70 const float cfPI = 3.1415926535897f;
71 float fx, fIBeta, fBeta, fTempval, fSincfunc;
72
73 /* Kaiser window */
74 fx = ((ft * fI + fi) - (fT * fI / 2)) / (fT * fI / 2);
75 fBeta = 2.0f;
76 fIBeta = 1.0f/(tng_calculate_coeff_bessi0(fBeta));
77 fTempval = tng_calculate_coeff_bessi0(fBeta * (float)sqrt(1.0f - fx * fx)) * fIBeta;
78
79 /* Sinc function */
80 if ((fT / 2 - ft - fi / fI) == 0)
81 {
82 fSincfunc = 1.0f;
83 }
84 else
85 {
86 fx = 0.9f * fScale * cfPI * (fT / 2 - (ft + fi / fI));
87 fSincfunc = (float)(sin(fx) / fx);
88 }
89
90 return fSincfunc*fTempval;
91 }
92
93 /*
94 ******************************************************************************
95
96 @Description
97
98 Calculates MSVDX scaler coefficients
99
100 @Input fPitch : Scale pitch
101
102 @Output Table : Table of coefficients
103
104 @Input I : Number of intpt? ( table dimension)
105
106 @Input T : Number of taps (table dimension)
107
108 ******************************************************************************/
tng_calculate_scaler_coeff(float fPitch,IMG_UINT8 Table[SCC_MAXTAP][SCC_MAXINTPT],IMG_UINT32 I,IMG_UINT32 T)109 static void tng_calculate_scaler_coeff( float fPitch,
110 IMG_UINT8 Table[SCC_MAXTAP][SCC_MAXINTPT],
111 IMG_UINT32 I,
112 IMG_UINT32 T)
113 {
114 /* Due to the nature of the function we will only ever want to calculate the first half of the */
115 /* taps and the middle one (is this really a tap ?) as the seconda half are derived from the */
116 /* first half as the function is symetrical. */
117 float fScale = 1.0f / fPitch;
118 IMG_UINT32 i, t;
119 float flTable[SCC_MAXTAP][SCC_MAXINTPT];
120 IMG_INT32 nTotal;
121 float ftotal;
122 IMG_INT32 val;
123 IMG_INT32 mT, mI; /* mirrored / middle Values for I and T */
124
125 memset(flTable, 0.0, SCC_MAXTAP * SCC_MAXINTPT);
126
127 if (fScale > 1.0f)
128 {
129 fScale = 1.0f;
130 }
131
132 for (i = 0; i < I; i++)
133 {
134 for (t = 0; t < T; t++)
135 {
136 flTable[t][i] = 0.0;
137 }
138 }
139
140 for (i = 0;i < I; i++)
141 {
142 for (t = 0; t < T; t++)
143 {
144 flTable[t][i] = tng_calculate_coeff_sync_func((float)i, (float)t,
145 (float)I, (float)T, fScale);
146 }
147 }
148
149 if (T>2)
150 {
151 for (t = 0; t < ((T / 2) + (T % 2)); t++)
152 {
153 for (i=0 ; i < I; i++)
154 {
155 /* copy the table around the centrepoint */
156 mT = ((T - 1) - t) + (I - i) / I;
157 mI = (I - i) % I;
158 if (((IMG_UINT32)mI < I) && ((IMG_UINT32)mT < T) &&
159 ((t < ((T / 2) + (T % 2) - 1)) || ((I - i) > ((T % 2) * (I / 2)))))
160 {
161 flTable[mT][mI] = flTable[t][i];
162 }
163 }
164 }
165
166 /* the middle value */
167 mT = T / 2;
168 if ((T % 2) != 0)
169 {
170 mI = I/2;
171 }
172 else
173 {
174 mI = 0;
175 }
176 flTable[mT][mI] = tng_calculate_coeff_sync_func(
177 (float) mI, (float) mT,
178 (float) I, (float) T, fScale);
179 }
180
181 /* normalize this interpolation point, and convert to 2.6 format trucating the result */
182 for (i = 0; i < I; i++)
183 {
184 nTotal = 0;
185 for (ftotal = 0,t = 0; t < T; t++)
186 {
187 ftotal += flTable[t][i];
188 }
189 for (t = 0; t < T; t++)
190 {
191 val = (IMG_UINT32) ((flTable[t][i] * 64.0f) / ftotal);
192 Table[t][i] = (IMG_UINT8) val;
193 nTotal += val;
194 }
195 if ((i <= (I / 2)) || (T <= 2)) /* normalize any floating point errors */
196 {
197 nTotal -= 64;
198 if ((i == (I / 2)) && (T > 2))
199 {
200 nTotal /= 2;
201 }
202
203 /* subtract the error from the I Point in the first tap */
204 /* ( this will not get mirrored, as it would go off the end ). */
205 Table[0][i] = (IMG_UINT8)(Table[0][i] - (IMG_UINT8) nTotal);
206 }
207 }
208
209 /* copy the normalised table around the centrepoint */
210 if (T > 2)
211 {
212 for ( t = 0; t < ((T / 2) + (T % 2)); t++)
213 {
214 for (i = 0; i < I; i++)
215 {
216 mT = ((T - 1) - t) + (I - i) / I;
217 mI = (I - i) % I;
218 if (((IMG_UINT32)mI < I) && ((IMG_UINT32)mT < T) && ((t < ((T / 2) + (T % 2) - 1)) || ((I - i) > ((T % 2) * (I / 2)))))
219 {
220 Table[mT][mI] = Table[t][i];
221 }
222 }
223 }
224 }
225 }
226
tng_calculate_scaler_coff_reg(object_context_p obj_context)227 void tng_calculate_scaler_coff_reg(object_context_p obj_context)
228 {
229 context_DEC_p ctx = (context_DEC_p) obj_context->format_data;
230 object_surface_p src_surface = obj_context->current_render_target;
231
232 /* If the surfaces are smaller that the size the object was constructed with, then we need to downscale */
233 float fHorzPitch;
234 float fVertPitch;
235 int scale_acc = 11;
236 int i;
237
238 #ifndef PSBVIDEO_MFLD
239 scale_acc = 12;
240 #endif
241
242 drv_debug_msg(VIDEO_DEBUG_GENERAL, "content crop is %dx%d",
243 obj_context->driver_data->render_rect.width, obj_context->driver_data->render_rect.height);
244 drv_debug_msg(VIDEO_DEBUG_GENERAL, "scaling dest is %dx%d",
245 obj_context->current_render_target->width_s, obj_context->current_render_target->height_s);
246 /* The unscaled dimensions in the pitch calculation below MUST match the Display Width and Height sent to the hardware */
247 fHorzPitch = obj_context->driver_data->render_rect.width / (float) obj_context->current_render_target->width_s;
248 fVertPitch = obj_context->driver_data->render_rect.height / (float) obj_context->current_render_target->height_s;
249
250 IMG_UINT32 reg_value;
251 IMG_UINT8 calc_table[4][16];
252
253 tng_calculate_scaler_coeff(fHorzPitch, calc_table, 16, 4);
254 for (i = 0; i < 4; i++)
255 {
256 unsigned int j = 1 + 2 * i;
257
258 reg_value = 0;
259 REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, HORIZONTAL_LUMA_COEFFICIENTS, HOR_LUMA_COEFF_3, calc_table[0][j]);
260 REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, HORIZONTAL_LUMA_COEFFICIENTS, HOR_LUMA_COEFF_2, calc_table[1][j]);
261 REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, HORIZONTAL_LUMA_COEFFICIENTS, HOR_LUMA_COEFF_1, calc_table[2][j]);
262 REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, HORIZONTAL_LUMA_COEFFICIENTS, HOR_LUMA_COEFF_0, calc_table[3][j]);
263
264 ctx->scaler_coeff_reg[/* Luma */ 0][/* Hori */ 0][i] = reg_value;
265
266 reg_value = 0;
267 REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, HORIZONTAL_CHROMA_COEFFICIENTS, HOR_CHROMA_COEFF_3, calc_table[0][j]);
268 REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, HORIZONTAL_CHROMA_COEFFICIENTS, HOR_CHROMA_COEFF_2, calc_table[1][j]);
269 REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, HORIZONTAL_CHROMA_COEFFICIENTS, HOR_CHROMA_COEFF_1, calc_table[2][j]);
270 REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, HORIZONTAL_CHROMA_COEFFICIENTS, HOR_CHROMA_COEFF_0, calc_table[3][j]);
271
272 ctx->scaler_coeff_reg[/* Chroma */ 1][/* H */ 0][i] = reg_value;
273 }
274
275 tng_calculate_scaler_coeff(fVertPitch, calc_table, 16, 4);
276 for (i = 0; i < 4; i++)
277 {
278 unsigned int j = 1+2*i;
279
280 reg_value = 0;
281 REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, VERTICAL_LUMA_COEFFICIENTS, VER_LUMA_COEFF_3, calc_table[0][j]);
282 REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, VERTICAL_LUMA_COEFFICIENTS, VER_LUMA_COEFF_2, calc_table[1][j]);
283 REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, VERTICAL_LUMA_COEFFICIENTS, VER_LUMA_COEFF_1, calc_table[2][j]);
284 REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, VERTICAL_LUMA_COEFFICIENTS, VER_LUMA_COEFF_0, calc_table[3][j]);
285
286 ctx->scaler_coeff_reg[/* L */ 0][/* Verti */ 1][i] = reg_value;
287
288 reg_value = 0;
289 REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, VERTICAL_CHROMA_COEFFICIENTS, VER_CHROMA_COEFF_3, calc_table[0][j]);
290 REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, VERTICAL_CHROMA_COEFFICIENTS, VER_CHROMA_COEFF_2,calc_table[1][j]);
291 REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, VERTICAL_CHROMA_COEFFICIENTS, VER_CHROMA_COEFF_1, calc_table[2][j]);
292 REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, VERTICAL_CHROMA_COEFFICIENTS, VER_CHROMA_COEFF_0, calc_table[3][j]);
293
294 ctx->scaler_coeff_reg[/* C */ 1][ /* V */ 1][i] = reg_value;
295 }
296
297 /* VXD can only downscale from the original display size. */
298 IMG_ASSERT(fHorzPitch >= 1 && fVertPitch >= 1);
299
300 #ifdef PSBVIDEO_MRFL_DEC
301 scale_acc = 12;
302 #endif
303
304 ctx->h_scaler_ctrl = 0;
305 REGIO_WRITE_FIELD_LITE(ctx->h_scaler_ctrl, MSVDX_CMDS, HORIZONTAL_SCALE_CONTROL, HORIZONTAL_SCALE_PITCH, (int)(fHorzPitch * (1 << scale_acc)));
306 REGIO_WRITE_FIELD_LITE(ctx->h_scaler_ctrl, MSVDX_CMDS, HORIZONTAL_SCALE_CONTROL, HORIZONTAL_INITIAL_POS, (int)(fHorzPitch * 0.5f * (1 << scale_acc)));
307
308 ctx->v_scaler_ctrl = 0;
309 REGIO_WRITE_FIELD_LITE(ctx->v_scaler_ctrl, MSVDX_CMDS, VERTICAL_SCALE_CONTROL, VERTICAL_SCALE_PITCH, (int)(fVertPitch * (1 << scale_acc) + 0.5) );
310 REGIO_WRITE_FIELD_LITE(ctx->v_scaler_ctrl, MSVDX_CMDS, VERTICAL_SCALE_CONTROL, VERTICAL_INITIAL_POS, (int)(fVertPitch * 0.5 * (1 << scale_acc) + 0.5));
311 }
312
tng_ved_write_scale_reg(object_context_p obj_context)313 void tng_ved_write_scale_reg(object_context_p obj_context)
314 {
315 uint32_t cmd = 0;
316 psb_cmdbuf_p cmdbuf = obj_context->cmdbuf;
317 context_DEC_p ctx = (context_DEC_p) obj_context->format_data;
318 object_surface_p src_surface = obj_context->current_render_target;
319 unsigned int lc, hv, x;
320
321 /* setup scaling coeffs */
322 if (obj_context->scaling_update) {
323 tng_calculate_scaler_coff_reg(obj_context);
324 obj_context->scaling_update = 0;
325 }
326
327 {
328 psb_cmdbuf_rendec_start(cmdbuf, RENDEC_REGISTER_OFFSET(MSVDX_CMDS, SCALED_DISPLAY_SIZE));
329
330 cmd = 0;
331 REGIO_WRITE_FIELD_LITE(cmd, MSVDX_CMDS, SCALED_DISPLAY_SIZE, SCALE_DISPLAY_WIDTH, obj_context->driver_data->render_rect.width - 1);
332 REGIO_WRITE_FIELD_LITE(cmd, MSVDX_CMDS, SCALED_DISPLAY_SIZE, SCALE_DISPLAY_HEIGHT, obj_context->driver_data->render_rect.height - 1);
333 psb_cmdbuf_rendec_write(cmdbuf, cmd);
334 psb_cmdbuf_rendec_write(cmdbuf, ctx->h_scaler_ctrl );
335 psb_cmdbuf_rendec_write(cmdbuf, ctx->v_scaler_ctrl ); //58
336 psb_cmdbuf_rendec_end(cmdbuf);
337 }
338
339 /* Write the Coefficeients */
340 {
341 psb_cmdbuf_rendec_start(cmdbuf, RENDEC_REGISTER_OFFSET(MSVDX_CMDS, HORIZONTAL_LUMA_COEFFICIENTS));
342 for(lc=0 ; lc<2 ; lc++)
343 {
344 for(hv=0 ; hv<2 ; hv++)
345 {
346 for(x=0 ; x<4 ; x++)
347 {
348 psb_cmdbuf_rendec_write(cmdbuf, ctx->scaler_coeff_reg[lc][hv][x]);
349 }
350 }
351 }
352 psb_cmdbuf_rendec_end(cmdbuf);
353 }
354 }
355