1 /*
2  * Copyright (c) 2011 Intel Corporation. All Rights Reserved.
3  * Copyright (c) Imagination Technologies Limited, UK
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the
14  * next paragraph) shall be included in all copies or substantial portions
15  * of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
20  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
21  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24  */
25 
26 /*
27  * Authors:
28  *    Li Zeng <li.zeng@intel.com>
29  */
30 #include "tng_vld_dec.h"
31 #include "psb_drv_debug.h"
32 #include <math.h>
33 #include "hwdefs/reg_io2.h"
34 #include "hwdefs/msvdx_offsets.h"
35 #include "hwdefs/msvdx_cmds_io2.h"
36 
37 #define SCC_MAXTAP      9
38 #define SCC_MAXINTPT    16
39 
tng_calculate_coeff_bessi0(float x)40 static float tng_calculate_coeff_bessi0(float x)
41 {
42     float ax,ans;
43     float y;
44 
45     ax = (float)fabs(x);
46     if (ax < 3.75)
47     {
48         y = (float)(x / 3.75);
49         y *= y;
50         ans = (float)(1.0 + y * (3.5156229 + y * (3.0899424 + y * (1.2067492
51             + y * (0.2659732 + y * (0.360768e-1 + y * 0.45813e-2))))));
52     }
53     else
54     {
55         y = (float)(3.75 / ax);
56         ans = (float)((float)((sqrt(ax) / sqrt(ax)) * (0.39894228 + y * (0.1328592e-1
57             + y * (0.225319e-2 + y * (-0.157565e-2 + y * (0.916281e-2
58             +y * (-0.2057706e-1 + y * (0.2635537e-1 + y * (-0.1647633e-1
59             + y * 0.392377e-2))))))))));
60     }
61     return ans;
62 }
63 
tng_calculate_coeff_sync_func(float fi,float ft,float fI,float fT,float fScale)64 static float tng_calculate_coeff_sync_func(    float fi,
65                                                     float ft,
66                                                     float fI,
67                                                     float fT,
68                                                     float fScale)
69 {
70     const float cfPI = 3.1415926535897f;
71     float fx, fIBeta, fBeta, fTempval, fSincfunc;
72 
73     /* Kaiser window */
74     fx = ((ft * fI + fi) - (fT * fI / 2)) / (fT * fI / 2);
75     fBeta = 2.0f;
76     fIBeta = 1.0f/(tng_calculate_coeff_bessi0(fBeta));
77     fTempval = tng_calculate_coeff_bessi0(fBeta * (float)sqrt(1.0f - fx * fx)) * fIBeta;
78 
79     /* Sinc function    */
80     if ((fT / 2 - ft - fi / fI) == 0)
81     {
82         fSincfunc = 1.0f;
83     }
84     else
85     {
86         fx = 0.9f * fScale * cfPI * (fT / 2 - (ft + fi / fI));
87         fSincfunc = (float)(sin(fx) / fx);
88     }
89 
90     return fSincfunc*fTempval;
91 }
92 
93 /*
94 ******************************************************************************
95 
96  @Description
97 
98  Calculates MSVDX scaler coefficients
99 
100  @Input     fPitch      :   Scale pitch
101 
102  @Output    Table       :  Table of coefficients
103 
104  @Input     I           :   Number of intpt? (   table dimension)
105 
106  @Input     T           :   Number of taps      (table dimension)
107 
108 ******************************************************************************/
tng_calculate_scaler_coeff(float fPitch,IMG_UINT8 Table[SCC_MAXTAP][SCC_MAXINTPT],IMG_UINT32 I,IMG_UINT32 T)109 static void tng_calculate_scaler_coeff(    float   fPitch,
110                                                     IMG_UINT8 Table[SCC_MAXTAP][SCC_MAXINTPT],
111                                                     IMG_UINT32 I,
112                                                     IMG_UINT32 T)
113 {
114     /* Due to the nature of the function we will only ever want to calculate the first half of the    */
115     /* taps and the middle one (is this really a tap ?) as the seconda half are derived from the    */
116     /* first half as the function is symetrical.                                                    */
117     float fScale = 1.0f / fPitch;
118     IMG_UINT32 i, t;
119     float flTable[SCC_MAXTAP][SCC_MAXINTPT];
120     IMG_INT32 nTotal;
121     float ftotal;
122     IMG_INT32 val;
123     IMG_INT32 mT, mI; /* mirrored / middle Values for I and T */
124 
125     memset(flTable, 0.0, SCC_MAXTAP * SCC_MAXINTPT);
126 
127     if (fScale > 1.0f)
128     {
129         fScale = 1.0f;
130     }
131 
132     for (i = 0; i < I; i++)
133     {
134         for (t = 0; t < T; t++)
135         {
136             flTable[t][i] = 0.0;
137         }
138     }
139 
140     for (i = 0;i < I; i++)
141     {
142         for (t = 0; t < T; t++)
143         {
144             flTable[t][i] = tng_calculate_coeff_sync_func((float)i, (float)t,
145                                                             (float)I, (float)T, fScale);
146         }
147     }
148 
149     if (T>2)
150     {
151         for (t = 0; t < ((T / 2) + (T % 2)); t++)
152         {
153             for (i=0 ; i < I; i++)
154             {
155                 /* copy the table around the centrepoint */
156                 mT = ((T - 1) - t) + (I - i) / I;
157                 mI = (I - i) % I;
158                 if (((IMG_UINT32)mI < I) && ((IMG_UINT32)mT < T) &&
159                     ((t < ((T / 2) + (T % 2) - 1)) || ((I - i) > ((T % 2) * (I / 2)))))
160                 {
161                     flTable[mT][mI] = flTable[t][i];
162                 }
163             }
164         }
165 
166         /* the middle value */
167         mT = T / 2;
168         if ((T % 2) != 0)
169         {
170             mI = I/2;
171         }
172         else
173         {
174             mI = 0;
175         }
176         flTable[mT][mI] = tng_calculate_coeff_sync_func(
177             (float) mI, (float) mT,
178             (float) I, (float) T, fScale);
179     }
180 
181     /* normalize this interpolation point, and convert to 2.6 format trucating the result    */
182     for (i = 0; i < I; i++)
183     {
184         nTotal = 0;
185         for (ftotal = 0,t = 0; t < T; t++)
186         {
187             ftotal += flTable[t][i];
188         }
189         for (t = 0; t < T; t++)
190         {
191             val = (IMG_UINT32) ((flTable[t][i] * 64.0f) / ftotal);
192             Table[t][i] = (IMG_UINT8) val;
193             nTotal += val;
194         }
195         if ((i <= (I / 2)) || (T <= 2)) /* normalize any floating point errors */
196         {
197             nTotal -= 64;
198             if ((i == (I / 2)) && (T > 2))
199             {
200                 nTotal /= 2;
201             }
202 
203             /* subtract the error from the I Point in the first tap */
204             /* ( this will not get mirrored, as it would go off the end ). */
205             Table[0][i] = (IMG_UINT8)(Table[0][i] - (IMG_UINT8) nTotal);
206         }
207     }
208 
209     /* copy the normalised table around the centrepoint */
210     if (T > 2)
211     {
212         for ( t = 0; t < ((T / 2) + (T % 2)); t++)
213         {
214             for (i = 0; i < I; i++)
215             {
216                 mT = ((T - 1) - t) + (I - i) / I;
217                 mI = (I - i) % I;
218                 if (((IMG_UINT32)mI < I) && ((IMG_UINT32)mT < T) && ((t < ((T / 2) + (T % 2) - 1)) || ((I - i) > ((T % 2) * (I / 2)))))
219                 {
220                     Table[mT][mI] = Table[t][i];
221                 }
222             }
223         }
224     }
225 }
226 
tng_calculate_scaler_coff_reg(object_context_p obj_context)227 void tng_calculate_scaler_coff_reg(object_context_p obj_context)
228 {
229     context_DEC_p ctx = (context_DEC_p) obj_context->format_data;
230     object_surface_p src_surface = obj_context->current_render_target;
231 
232     /* If the surfaces are smaller that the size the object was constructed with, then we need to downscale */
233     float fHorzPitch;
234     float fVertPitch;
235     int scale_acc = 11;
236     int i;
237 
238 #ifndef PSBVIDEO_MFLD
239     scale_acc = 12;
240 #endif
241 
242     drv_debug_msg(VIDEO_DEBUG_GENERAL, "content crop is %dx%d",
243         obj_context->driver_data->render_rect.width, obj_context->driver_data->render_rect.height);
244     drv_debug_msg(VIDEO_DEBUG_GENERAL, "scaling dest is %dx%d",
245         obj_context->current_render_target->width_s, obj_context->current_render_target->height_s);
246     /* The unscaled dimensions in the pitch calculation below MUST match the Display Width and Height sent to the hardware */
247     fHorzPitch = obj_context->driver_data->render_rect.width / (float) obj_context->current_render_target->width_s;
248     fVertPitch = obj_context->driver_data->render_rect.height / (float) obj_context->current_render_target->height_s;
249 
250     IMG_UINT32 reg_value;
251     IMG_UINT8 calc_table[4][16];
252 
253     tng_calculate_scaler_coeff(fHorzPitch, calc_table, 16, 4);
254     for (i = 0; i < 4; i++)
255     {
256        unsigned int  j = 1 + 2 * i;
257 
258         reg_value = 0;
259         REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, HORIZONTAL_LUMA_COEFFICIENTS, HOR_LUMA_COEFF_3, calc_table[0][j]);
260         REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, HORIZONTAL_LUMA_COEFFICIENTS, HOR_LUMA_COEFF_2, calc_table[1][j]);
261         REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, HORIZONTAL_LUMA_COEFFICIENTS, HOR_LUMA_COEFF_1, calc_table[2][j]);
262         REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, HORIZONTAL_LUMA_COEFFICIENTS, HOR_LUMA_COEFF_0, calc_table[3][j]);
263 
264         ctx->scaler_coeff_reg[/* Luma */ 0][/* Hori */ 0][i] = reg_value;
265 
266         reg_value = 0;
267         REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, HORIZONTAL_CHROMA_COEFFICIENTS, HOR_CHROMA_COEFF_3, calc_table[0][j]);
268         REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, HORIZONTAL_CHROMA_COEFFICIENTS, HOR_CHROMA_COEFF_2, calc_table[1][j]);
269         REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, HORIZONTAL_CHROMA_COEFFICIENTS, HOR_CHROMA_COEFF_1, calc_table[2][j]);
270         REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, HORIZONTAL_CHROMA_COEFFICIENTS, HOR_CHROMA_COEFF_0, calc_table[3][j]);
271 
272         ctx->scaler_coeff_reg[/* Chroma */ 1][/* H */ 0][i] = reg_value;
273     }
274 
275     tng_calculate_scaler_coeff(fVertPitch, calc_table, 16, 4);
276     for (i = 0; i < 4; i++)
277     {
278         unsigned int j = 1+2*i;
279 
280         reg_value = 0;
281         REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, VERTICAL_LUMA_COEFFICIENTS, VER_LUMA_COEFF_3, calc_table[0][j]);
282         REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, VERTICAL_LUMA_COEFFICIENTS, VER_LUMA_COEFF_2, calc_table[1][j]);
283         REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, VERTICAL_LUMA_COEFFICIENTS, VER_LUMA_COEFF_1, calc_table[2][j]);
284         REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, VERTICAL_LUMA_COEFFICIENTS, VER_LUMA_COEFF_0, calc_table[3][j]);
285 
286         ctx->scaler_coeff_reg[/* L */ 0][/* Verti */ 1][i] = reg_value;
287 
288         reg_value = 0;
289         REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, VERTICAL_CHROMA_COEFFICIENTS, VER_CHROMA_COEFF_3, calc_table[0][j]);
290         REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, VERTICAL_CHROMA_COEFFICIENTS, VER_CHROMA_COEFF_2,calc_table[1][j]);
291         REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, VERTICAL_CHROMA_COEFFICIENTS, VER_CHROMA_COEFF_1, calc_table[2][j]);
292         REGIO_WRITE_FIELD(reg_value, MSVDX_CMDS, VERTICAL_CHROMA_COEFFICIENTS, VER_CHROMA_COEFF_0, calc_table[3][j]);
293 
294         ctx->scaler_coeff_reg[/* C */ 1][  /* V */ 1][i] = reg_value;
295     }
296 
297     /* VXD can only downscale from the original display size. */
298     IMG_ASSERT(fHorzPitch >= 1 && fVertPitch >= 1);
299 
300 #ifdef PSBVIDEO_MRFL_DEC
301     scale_acc = 12;
302 #endif
303 
304     ctx->h_scaler_ctrl = 0;
305     REGIO_WRITE_FIELD_LITE(ctx->h_scaler_ctrl, MSVDX_CMDS, HORIZONTAL_SCALE_CONTROL, HORIZONTAL_SCALE_PITCH, (int)(fHorzPitch * (1 << scale_acc)));
306     REGIO_WRITE_FIELD_LITE(ctx->h_scaler_ctrl, MSVDX_CMDS, HORIZONTAL_SCALE_CONTROL, HORIZONTAL_INITIAL_POS, (int)(fHorzPitch * 0.5f * (1 << scale_acc)));
307 
308     ctx->v_scaler_ctrl = 0;
309     REGIO_WRITE_FIELD_LITE(ctx->v_scaler_ctrl, MSVDX_CMDS, VERTICAL_SCALE_CONTROL, VERTICAL_SCALE_PITCH, (int)(fVertPitch * (1 << scale_acc) + 0.5) );
310     REGIO_WRITE_FIELD_LITE(ctx->v_scaler_ctrl, MSVDX_CMDS, VERTICAL_SCALE_CONTROL, VERTICAL_INITIAL_POS, (int)(fVertPitch * 0.5 * (1 << scale_acc) + 0.5));
311 }
312 
tng_ved_write_scale_reg(object_context_p obj_context)313 void tng_ved_write_scale_reg(object_context_p obj_context)
314 {
315     uint32_t cmd = 0;
316     psb_cmdbuf_p cmdbuf = obj_context->cmdbuf;
317     context_DEC_p ctx = (context_DEC_p) obj_context->format_data;
318     object_surface_p src_surface = obj_context->current_render_target;
319     unsigned int lc, hv, x;
320 
321     /* setup scaling coeffs */
322     if (obj_context->scaling_update) {
323         tng_calculate_scaler_coff_reg(obj_context);
324         obj_context->scaling_update = 0;
325     }
326 
327     {
328         psb_cmdbuf_rendec_start(cmdbuf, RENDEC_REGISTER_OFFSET(MSVDX_CMDS, SCALED_DISPLAY_SIZE));
329 
330         cmd = 0;
331         REGIO_WRITE_FIELD_LITE(cmd, MSVDX_CMDS, SCALED_DISPLAY_SIZE, SCALE_DISPLAY_WIDTH, obj_context->driver_data->render_rect.width - 1);
332         REGIO_WRITE_FIELD_LITE(cmd, MSVDX_CMDS, SCALED_DISPLAY_SIZE, SCALE_DISPLAY_HEIGHT, obj_context->driver_data->render_rect.height - 1);
333         psb_cmdbuf_rendec_write(cmdbuf, cmd);
334         psb_cmdbuf_rendec_write(cmdbuf, ctx->h_scaler_ctrl );
335         psb_cmdbuf_rendec_write(cmdbuf, ctx->v_scaler_ctrl ); //58
336         psb_cmdbuf_rendec_end(cmdbuf);
337     }
338 
339     /* Write the Coefficeients */
340     {
341         psb_cmdbuf_rendec_start(cmdbuf, RENDEC_REGISTER_OFFSET(MSVDX_CMDS, HORIZONTAL_LUMA_COEFFICIENTS));
342         for(lc=0 ; lc<2 ; lc++)
343         {
344             for(hv=0 ; hv<2 ; hv++)
345             {
346                 for(x=0 ; x<4 ; x++)
347                 {
348                     psb_cmdbuf_rendec_write(cmdbuf, ctx->scaler_coeff_reg[lc][hv][x]);
349                 }
350             }
351         }
352         psb_cmdbuf_rendec_end(cmdbuf);
353     }
354 }
355