1 /****************************************************************************** 2 * 3 * Copyright (C) 2015 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 */ 20 /*****************************************************************************/ 21 /* */ 22 /* File Name : impeg2_idct.c */ 23 /* */ 24 /* Description : Contains 2d idct and invese quantization functions */ 25 /* */ 26 /* List of Functions : impeg2_idct_recon_dc() */ 27 /* impeg2_idct_recon_dc_mismatch() */ 28 /* impeg2_idct_recon() */ 29 /* */ 30 /* Issues / Problems : None */ 31 /* */ 32 /* Revision History : */ 33 /* */ 34 /* DD MM YYYY Author(s) Changes */ 35 /* 10 09 2005 Hairsh M First Version */ 36 /* */ 37 /*****************************************************************************/ 38 /* 39 IEEE - 1180 results for this IDCT 40 L 256 256 5 5 300 300 384 384 Thresholds 41 H 255 255 5 5 300 300 383 383 42 sign 1 -1 1 -1 1 -1 1 -1 43 Peak Error 1 1 1 1 1 1 1 1 1 44 Peak Mean Square Error 0.0191 0.0188 0.0108 0.0111 0.0176 0.0188 0.0165 0.0177 0.06 45 Overall Mean Square Error 0.01566406 0.01597656 0.0091875 0.00908906 0.01499063 0.01533281 0.01432344 0.01412344 0.02 46 Peak Mean Error 0.0027 0.0026 0.0028 0.002 0.0017 0.0033 0.0031 0.0025 0.015 47 Overall Mean Error 0.00002656 -0.00031406 0.00016875 0.00005469 -0.00003125 0.00011406 0.00009219 0.00004219 0.0015 48 */ 49 #include <stdio.h> 50 #include <string.h> 51 52 #include "iv_datatypedef.h" 53 #include "iv.h" 54 #include "impeg2_defs.h" 55 #include "impeg2_platform_macros.h" 56 57 #include "impeg2_macros.h" 58 #include "impeg2_globals.h" 59 #include "impeg2_idct.h" 60 61 62 void impeg2_idct_recon_dc(WORD16 *pi2_src, 63 WORD16 *pi2_tmp, 64 UWORD8 *pu1_pred, 65 UWORD8 *pu1_dst, 66 WORD32 i4_src_strd, 67 WORD32 i4_pred_strd, 68 WORD32 i4_dst_strd, 69 WORD32 i4_zero_cols, 70 WORD32 i4_zero_rows) 71 { 72 WORD32 i4_val, i, j; 73 74 UNUSED(pi2_tmp); 75 UNUSED(i4_src_strd); 76 UNUSED(i4_zero_cols); 77 UNUSED(i4_zero_rows); 78 79 i4_val = pi2_src[0] * gai2_impeg2_idct_q15[0]; 80 i4_val = ((i4_val + IDCT_STG1_ROUND) >> IDCT_STG1_SHIFT); 81 i4_val = i4_val * gai2_impeg2_idct_q11[0]; 82 i4_val = ((i4_val + IDCT_STG2_ROUND) >> IDCT_STG2_SHIFT); 83 84 for(i = 0; i < TRANS_SIZE_8; i++) 85 { 86 for(j = 0; j < TRANS_SIZE_8; j++) 87 { 88 pu1_dst[j] = CLIP_U8(i4_val + pu1_pred[j]); 89 } 90 pu1_dst += i4_dst_strd; 91 pu1_pred += i4_pred_strd; 92 } 93 } 94 void impeg2_idct_recon_dc_mismatch(WORD16 *pi2_src, 95 WORD16 *pi2_tmp, 96 UWORD8 *pu1_pred, 97 UWORD8 *pu1_dst, 98 WORD32 i4_src_strd, 99 WORD32 i4_pred_strd, 100 WORD32 i4_dst_strd, 101 WORD32 i4_zero_cols, 102 WORD32 i4_zero_rows) 103 104 { 105 WORD32 i4_val, i, j; 106 WORD32 i4_count = 0; 107 WORD32 i4_sum; 108 109 UNUSED(pi2_tmp); 110 UNUSED(i4_src_strd); 111 UNUSED(i4_zero_cols); 112 UNUSED(i4_zero_rows); 113 114 i4_val = pi2_src[0] * gai2_impeg2_idct_q15[0]; 115 i4_val = ((i4_val + IDCT_STG1_ROUND) >> IDCT_STG1_SHIFT); 116 117 i4_val *= gai2_impeg2_idct_q11[0]; 118 for(i = 0; i < TRANS_SIZE_8; i++) 119 { 120 for (j = 0; j < TRANS_SIZE_8; j++) 121 { 122 i4_sum = i4_val; 123 i4_sum += gai2_impeg2_mismatch_stg2_additive[i4_count]; 124 i4_sum = ((i4_sum + IDCT_STG2_ROUND) >> IDCT_STG2_SHIFT); 125 i4_sum += pu1_pred[j]; 126 pu1_dst[j] = CLIP_U8(i4_sum); 127 i4_count++; 128 } 129 130 pu1_dst += i4_dst_strd; 131 pu1_pred += i4_pred_strd; 132 } 133 134 } 135 /** 136 ******************************************************************************* 137 * 138 * @brief 139 * This function performs Inverse transform and reconstruction for 8x8 140 * input block 141 * 142 * @par Description: 143 * Performs inverse transform and adds the prediction data and clips output 144 * to 8 bit 145 * 146 * @param[in] pi2_src 147 * Input 8x8 coefficients 148 * 149 * @param[in] pi2_tmp 150 * Temporary 8x8 buffer for storing inverse 151 * 152 * transform 153 * 1st stage output 154 * 155 * @param[in] pu1_pred 156 * Prediction 8x8 block 157 * 158 * @param[out] pu1_dst 159 * Output 8x8 block 160 * 161 * @param[in] src_strd 162 * Input stride 163 * 164 * @param[in] pred_strd 165 * Prediction stride 166 * 167 * @param[in] dst_strd 168 * Output Stride 169 * 170 * @param[in] shift 171 * Output shift 172 * 173 * @param[in] zero_cols 174 * Zero columns in pi2_src 175 * 176 * @returns Void 177 * 178 * @remarks 179 * None 180 * 181 ******************************************************************************* 182 */ 183 184 void impeg2_idct_recon(WORD16 *pi2_src, 185 WORD16 *pi2_tmp, 186 UWORD8 *pu1_pred, 187 UWORD8 *pu1_dst, 188 WORD32 i4_src_strd, 189 WORD32 i4_pred_strd, 190 WORD32 i4_dst_strd, 191 WORD32 i4_zero_cols, 192 WORD32 i4_zero_rows) 193 { 194 WORD32 j, k; 195 WORD32 ai4_e[4], ai4_o[4]; 196 WORD32 ai4_ee[2], ai4_eo[2]; 197 WORD32 i4_add; 198 WORD32 i4_shift; 199 WORD16 *pi2_tmp_orig; 200 WORD32 i4_trans_size; 201 WORD32 i4_zero_rows_2nd_stage = i4_zero_cols; 202 WORD32 i4_row_limit_2nd_stage; 203 204 i4_trans_size = TRANS_SIZE_8; 205 206 pi2_tmp_orig = pi2_tmp; 207 208 if((i4_zero_cols & 0xF0) == 0xF0) 209 i4_row_limit_2nd_stage = 4; 210 else 211 i4_row_limit_2nd_stage = TRANS_SIZE_8; 212 213 214 if((i4_zero_rows & 0xF0) == 0xF0) /* First 4 rows of input are non-zero */ 215 { 216 /************************************************************************************************/ 217 /**********************************START - IT_RECON_8x8******************************************/ 218 /************************************************************************************************/ 219 220 /* Inverse Transform 1st stage */ 221 i4_shift = IDCT_STG1_SHIFT; 222 i4_add = 1 << (i4_shift - 1); 223 224 for(j = 0; j < i4_row_limit_2nd_stage; j++) 225 { 226 /* Checking for Zero Cols */ 227 if((i4_zero_cols & 1) == 1) 228 { 229 memset(pi2_tmp, 0, i4_trans_size * sizeof(WORD16)); 230 } 231 else 232 { 233 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 234 for(k = 0; k < 4; k++) 235 { 236 ai4_o[k] = gai2_impeg2_idct_q15[1 * 8 + k] * pi2_src[i4_src_strd] 237 + gai2_impeg2_idct_q15[3 * 8 + k] 238 * pi2_src[3 * i4_src_strd]; 239 } 240 ai4_eo[0] = gai2_impeg2_idct_q15[2 * 8 + 0] * pi2_src[2 * i4_src_strd]; 241 ai4_eo[1] = gai2_impeg2_idct_q15[2 * 8 + 1] * pi2_src[2 * i4_src_strd]; 242 ai4_ee[0] = gai2_impeg2_idct_q15[0 * 8 + 0] * pi2_src[0]; 243 ai4_ee[1] = gai2_impeg2_idct_q15[0 * 8 + 1] * pi2_src[0]; 244 245 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 246 ai4_e[0] = ai4_ee[0] + ai4_eo[0]; 247 ai4_e[3] = ai4_ee[0] - ai4_eo[0]; 248 ai4_e[1] = ai4_ee[1] + ai4_eo[1]; 249 ai4_e[2] = ai4_ee[1] - ai4_eo[1]; 250 for(k = 0; k < 4; k++) 251 { 252 pi2_tmp[k] = 253 CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift)); 254 pi2_tmp[k + 4] = 255 CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift)); 256 } 257 } 258 pi2_src++; 259 pi2_tmp += i4_trans_size; 260 i4_zero_cols = i4_zero_cols >> 1; 261 } 262 263 pi2_tmp = pi2_tmp_orig; 264 265 /* Inverse Transform 2nd stage */ 266 i4_shift = IDCT_STG2_SHIFT; 267 i4_add = 1 << (i4_shift - 1); 268 if((i4_zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */ 269 { 270 for(j = 0; j < i4_trans_size; j++) 271 { 272 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 273 for(k = 0; k < 4; k++) 274 { 275 ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size] 276 + gai2_impeg2_idct_q11[3 * 8 + k] * pi2_tmp[3 * i4_trans_size]; 277 } 278 ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size]; 279 ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size]; 280 ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0]; 281 ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0]; 282 283 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 284 ai4_e[0] = ai4_ee[0] + ai4_eo[0]; 285 ai4_e[3] = ai4_ee[0] - ai4_eo[0]; 286 ai4_e[1] = ai4_ee[1] + ai4_eo[1]; 287 ai4_e[2] = ai4_ee[1] - ai4_eo[1]; 288 for(k = 0; k < 4; k++) 289 { 290 WORD32 itrans_out; 291 itrans_out = 292 CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift)); 293 pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); 294 itrans_out = 295 CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift)); 296 pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4])); 297 } 298 pi2_tmp++; 299 pu1_pred += i4_pred_strd; 300 pu1_dst += i4_dst_strd; 301 } 302 } 303 else /* All rows of output of 1st stage are non-zero */ 304 { 305 for(j = 0; j < i4_trans_size; j++) 306 { 307 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 308 for(k = 0; k < 4; k++) 309 { 310 ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size] 311 + gai2_impeg2_idct_q11[3 * 8 + k] 312 * pi2_tmp[3 * i4_trans_size] 313 + gai2_impeg2_idct_q11[5 * 8 + k] 314 * pi2_tmp[5 * i4_trans_size] 315 + gai2_impeg2_idct_q11[7 * 8 + k] 316 * pi2_tmp[7 * i4_trans_size]; 317 } 318 319 ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size] 320 + gai2_impeg2_idct_q11[6 * 8 + 0] * pi2_tmp[6 * i4_trans_size]; 321 ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size] 322 + gai2_impeg2_idct_q11[6 * 8 + 1] * pi2_tmp[6 * i4_trans_size]; 323 ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0] 324 + gai2_impeg2_idct_q11[4 * 8 + 0] * pi2_tmp[4 * i4_trans_size]; 325 ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0] 326 + gai2_impeg2_idct_q11[4 * 8 + 1] * pi2_tmp[4 * i4_trans_size]; 327 328 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 329 ai4_e[0] = ai4_ee[0] + ai4_eo[0]; 330 ai4_e[3] = ai4_ee[0] - ai4_eo[0]; 331 ai4_e[1] = ai4_ee[1] + ai4_eo[1]; 332 ai4_e[2] = ai4_ee[1] - ai4_eo[1]; 333 for(k = 0; k < 4; k++) 334 { 335 WORD32 itrans_out; 336 itrans_out = 337 CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift)); 338 pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); 339 itrans_out = 340 CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift)); 341 pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4])); 342 } 343 pi2_tmp++; 344 pu1_pred += i4_pred_strd; 345 pu1_dst += i4_dst_strd; 346 } 347 } 348 /************************************************************************************************/ 349 /************************************END - IT_RECON_8x8******************************************/ 350 /************************************************************************************************/ 351 } 352 else /* All rows of input are non-zero */ 353 { 354 /************************************************************************************************/ 355 /**********************************START - IT_RECON_8x8******************************************/ 356 /************************************************************************************************/ 357 358 /* Inverse Transform 1st stage */ 359 i4_shift = IDCT_STG1_SHIFT; 360 i4_add = 1 << (i4_shift - 1); 361 362 for(j = 0; j < i4_row_limit_2nd_stage; j++) 363 { 364 /* Checking for Zero Cols */ 365 if((i4_zero_cols & 1) == 1) 366 { 367 memset(pi2_tmp, 0, i4_trans_size * sizeof(WORD16)); 368 } 369 else 370 { 371 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 372 for(k = 0; k < 4; k++) 373 { 374 ai4_o[k] = gai2_impeg2_idct_q15[1 * 8 + k] * pi2_src[i4_src_strd] 375 + gai2_impeg2_idct_q15[3 * 8 + k] 376 * pi2_src[3 * i4_src_strd] 377 + gai2_impeg2_idct_q15[5 * 8 + k] 378 * pi2_src[5 * i4_src_strd] 379 + gai2_impeg2_idct_q15[7 * 8 + k] 380 * pi2_src[7 * i4_src_strd]; 381 } 382 383 ai4_eo[0] = gai2_impeg2_idct_q15[2 * 8 + 0] * pi2_src[2 * i4_src_strd] 384 + gai2_impeg2_idct_q15[6 * 8 + 0] * pi2_src[6 * i4_src_strd]; 385 ai4_eo[1] = gai2_impeg2_idct_q15[2 * 8 + 1] * pi2_src[2 * i4_src_strd] 386 + gai2_impeg2_idct_q15[6 * 8 + 1] * pi2_src[6 * i4_src_strd]; 387 ai4_ee[0] = gai2_impeg2_idct_q15[0 * 8 + 0] * pi2_src[0] 388 + gai2_impeg2_idct_q15[4 * 8 + 0] * pi2_src[4 * i4_src_strd]; 389 ai4_ee[1] = gai2_impeg2_idct_q15[0 * 8 + 1] * pi2_src[0] 390 + gai2_impeg2_idct_q15[4 * 8 + 1] * pi2_src[4 * i4_src_strd]; 391 392 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 393 ai4_e[0] = ai4_ee[0] + ai4_eo[0]; 394 ai4_e[3] = ai4_ee[0] - ai4_eo[0]; 395 ai4_e[1] = ai4_ee[1] + ai4_eo[1]; 396 ai4_e[2] = ai4_ee[1] - ai4_eo[1]; 397 for(k = 0; k < 4; k++) 398 { 399 pi2_tmp[k] = 400 CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift)); 401 pi2_tmp[k + 4] = 402 CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift)); 403 } 404 } 405 pi2_src++; 406 pi2_tmp += i4_trans_size; 407 i4_zero_cols = i4_zero_cols >> 1; 408 } 409 410 pi2_tmp = pi2_tmp_orig; 411 412 /* Inverse Transform 2nd stage */ 413 i4_shift = IDCT_STG2_SHIFT; 414 i4_add = 1 << (i4_shift - 1); 415 if((i4_zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */ 416 { 417 for(j = 0; j < i4_trans_size; j++) 418 { 419 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 420 for(k = 0; k < 4; k++) 421 { 422 ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size] 423 + gai2_impeg2_idct_q11[3 * 8 + k] * pi2_tmp[3 * i4_trans_size]; 424 } 425 ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size]; 426 ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size]; 427 ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0]; 428 ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0]; 429 430 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 431 ai4_e[0] = ai4_ee[0] + ai4_eo[0]; 432 ai4_e[3] = ai4_ee[0] - ai4_eo[0]; 433 ai4_e[1] = ai4_ee[1] + ai4_eo[1]; 434 ai4_e[2] = ai4_ee[1] - ai4_eo[1]; 435 for(k = 0; k < 4; k++) 436 { 437 WORD32 itrans_out; 438 itrans_out = 439 CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift)); 440 pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); 441 itrans_out = 442 CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift)); 443 pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4])); 444 } 445 pi2_tmp++; 446 pu1_pred += i4_pred_strd; 447 pu1_dst += i4_dst_strd; 448 } 449 } 450 else /* All rows of output of 1st stage are non-zero */ 451 { 452 for(j = 0; j < i4_trans_size; j++) 453 { 454 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 455 for(k = 0; k < 4; k++) 456 { 457 ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size] 458 + gai2_impeg2_idct_q11[3 * 8 + k] 459 * pi2_tmp[3 * i4_trans_size] 460 + gai2_impeg2_idct_q11[5 * 8 + k] 461 * pi2_tmp[5 * i4_trans_size] 462 + gai2_impeg2_idct_q11[7 * 8 + k] 463 * pi2_tmp[7 * i4_trans_size]; 464 } 465 466 ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size] 467 + gai2_impeg2_idct_q11[6 * 8 + 0] * pi2_tmp[6 * i4_trans_size]; 468 ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size] 469 + gai2_impeg2_idct_q11[6 * 8 + 1] * pi2_tmp[6 * i4_trans_size]; 470 ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0] 471 + gai2_impeg2_idct_q11[4 * 8 + 0] * pi2_tmp[4 * i4_trans_size]; 472 ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0] 473 + gai2_impeg2_idct_q11[4 * 8 + 1] * pi2_tmp[4 * i4_trans_size]; 474 475 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ 476 ai4_e[0] = ai4_ee[0] + ai4_eo[0]; 477 ai4_e[3] = ai4_ee[0] - ai4_eo[0]; 478 ai4_e[1] = ai4_ee[1] + ai4_eo[1]; 479 ai4_e[2] = ai4_ee[1] - ai4_eo[1]; 480 for(k = 0; k < 4; k++) 481 { 482 WORD32 itrans_out; 483 itrans_out = 484 CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift)); 485 pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); 486 itrans_out = 487 CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift)); 488 pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4])); 489 } 490 pi2_tmp++; 491 pu1_pred += i4_pred_strd; 492 pu1_dst += i4_dst_strd; 493 } 494 } 495 /************************************************************************************************/ 496 /************************************END - IT_RECON_8x8******************************************/ 497 /************************************************************************************************/ 498 } 499 } 500 501