1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 
12 #include "vpx_config.h"
13 #include "vp8_rtcd.h"
14 #if !defined(WIN32) && CONFIG_OS_SUPPORT == 1
15 # include <unistd.h>
16 #endif
17 #include "onyxd_int.h"
18 #include "vpx_mem/vpx_mem.h"
19 #include "vp8/common/threading.h"
20 
21 #include "vp8/common/loopfilter.h"
22 #include "vp8/common/extend.h"
23 #include "vpx_ports/vpx_timer.h"
24 #include "detokenize.h"
25 #include "vp8/common/reconintra4x4.h"
26 #include "vp8/common/reconinter.h"
27 #include "vp8/common/reconintra.h"
28 #include "vp8/common/setupintrarecon.h"
29 #if CONFIG_ERROR_CONCEALMENT
30 #include "error_concealment.h"
31 #endif
32 
33 #define CALLOC_ARRAY(p, n) CHECK_MEM_ERROR((p), vpx_calloc(sizeof(*(p)), (n)))
34 #define CALLOC_ARRAY_ALIGNED(p, n, algn) do {                      \
35   CHECK_MEM_ERROR((p), vpx_memalign((algn), sizeof(*(p)) * (n)));  \
36   memset((p), 0, (n) * sizeof(*(p)));                              \
37 } while (0)
38 
39 
40 void vp8_mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd);
41 
setup_decoding_thread_data(VP8D_COMP * pbi,MACROBLOCKD * xd,MB_ROW_DEC * mbrd,int count)42 static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)
43 {
44     VP8_COMMON *const pc = & pbi->common;
45     int i;
46 
47     for (i = 0; i < count; i++)
48     {
49         MACROBLOCKD *mbd = &mbrd[i].mbd;
50         mbd->subpixel_predict        = xd->subpixel_predict;
51         mbd->subpixel_predict8x4     = xd->subpixel_predict8x4;
52         mbd->subpixel_predict8x8     = xd->subpixel_predict8x8;
53         mbd->subpixel_predict16x16   = xd->subpixel_predict16x16;
54 
55         mbd->mode_info_context = pc->mi   + pc->mode_info_stride * (i + 1);
56         mbd->mode_info_stride  = pc->mode_info_stride;
57 
58         mbd->frame_type = pc->frame_type;
59         mbd->pre = xd->pre;
60         mbd->dst = xd->dst;
61 
62         mbd->segmentation_enabled    = xd->segmentation_enabled;
63         mbd->mb_segement_abs_delta     = xd->mb_segement_abs_delta;
64         memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
65 
66         /*signed char ref_lf_deltas[MAX_REF_LF_DELTAS];*/
67         memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas));
68         /*signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];*/
69         memcpy(mbd->mode_lf_deltas, xd->mode_lf_deltas, sizeof(xd->mode_lf_deltas));
70         /*unsigned char mode_ref_lf_delta_enabled;
71         unsigned char mode_ref_lf_delta_update;*/
72         mbd->mode_ref_lf_delta_enabled    = xd->mode_ref_lf_delta_enabled;
73         mbd->mode_ref_lf_delta_update    = xd->mode_ref_lf_delta_update;
74 
75         mbd->current_bc = &pbi->mbc[0];
76 
77         memcpy(mbd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc));
78         memcpy(mbd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1));
79         memcpy(mbd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2));
80         memcpy(mbd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv));
81 
82         mbd->fullpixel_mask = 0xffffffff;
83 
84         if (pc->full_pixel)
85             mbd->fullpixel_mask = 0xfffffff8;
86 
87     }
88 
89     for (i = 0; i < pc->mb_rows; i++)
90         pbi->mt_current_mb_col[i] = -1;
91 }
92 
mt_decode_macroblock(VP8D_COMP * pbi,MACROBLOCKD * xd,unsigned int mb_idx)93 static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
94                                  unsigned int mb_idx)
95 {
96     MB_PREDICTION_MODE mode;
97     int i;
98 #if CONFIG_ERROR_CONCEALMENT
99     int corruption_detected = 0;
100 #else
101     (void)mb_idx;
102 #endif
103 
104     if (xd->mode_info_context->mbmi.mb_skip_coeff)
105     {
106         vp8_reset_mb_tokens_context(xd);
107     }
108     else if (!vp8dx_bool_error(xd->current_bc))
109     {
110         int eobtotal;
111         eobtotal = vp8_decode_mb_tokens(pbi, xd);
112 
113         /* Special case:  Force the loopfilter to skip when eobtotal is zero */
114         xd->mode_info_context->mbmi.mb_skip_coeff = (eobtotal==0);
115     }
116 
117     mode = xd->mode_info_context->mbmi.mode;
118 
119     if (xd->segmentation_enabled)
120         vp8_mb_init_dequantizer(pbi, xd);
121 
122 
123 #if CONFIG_ERROR_CONCEALMENT
124 
125     if(pbi->ec_active)
126     {
127         int throw_residual;
128         /* When we have independent partitions we can apply residual even
129          * though other partitions within the frame are corrupt.
130          */
131         throw_residual = (!pbi->independent_partitions &&
132                           pbi->frame_corrupt_residual);
133         throw_residual = (throw_residual || vp8dx_bool_error(xd->current_bc));
134 
135         if ((mb_idx >= pbi->mvs_corrupt_from_mb || throw_residual))
136         {
137             /* MB with corrupt residuals or corrupt mode/motion vectors.
138              * Better to use the predictor as reconstruction.
139              */
140             pbi->frame_corrupt_residual = 1;
141             memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
142             vp8_conceal_corrupt_mb(xd);
143 
144 
145             corruption_detected = 1;
146 
147             /* force idct to be skipped for B_PRED and use the
148              * prediction only for reconstruction
149              * */
150             memset(xd->eobs, 0, 25);
151         }
152     }
153 #endif
154 
155     /* do prediction */
156     if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
157     {
158         vp8_build_intra_predictors_mbuv_s(xd,
159                                           xd->recon_above[1],
160                                           xd->recon_above[2],
161                                           xd->recon_left[1],
162                                           xd->recon_left[2],
163                                           xd->recon_left_stride[1],
164                                           xd->dst.u_buffer, xd->dst.v_buffer,
165                                           xd->dst.uv_stride);
166 
167         if (mode != B_PRED)
168         {
169             vp8_build_intra_predictors_mby_s(xd,
170                                                  xd->recon_above[0],
171                                                  xd->recon_left[0],
172                                                  xd->recon_left_stride[0],
173                                                  xd->dst.y_buffer,
174                                                  xd->dst.y_stride);
175         }
176         else
177         {
178             short *DQC = xd->dequant_y1;
179             int dst_stride = xd->dst.y_stride;
180 
181             /* clear out residual eob info */
182             if(xd->mode_info_context->mbmi.mb_skip_coeff)
183                 memset(xd->eobs, 0, 25);
184 
185             intra_prediction_down_copy(xd, xd->recon_above[0] + 16);
186 
187             for (i = 0; i < 16; i++)
188             {
189                 BLOCKD *b = &xd->block[i];
190                 unsigned char *dst = xd->dst.y_buffer + b->offset;
191                 B_PREDICTION_MODE b_mode =
192                     xd->mode_info_context->bmi[i].as_mode;
193                 unsigned char *Above;
194                 unsigned char *yleft;
195                 int left_stride;
196                 unsigned char top_left;
197 
198                 /*Caution: For some b_mode, it needs 8 pixels (4 above + 4 above-right).*/
199                 if (i < 4 && pbi->common.filter_level)
200                     Above = xd->recon_above[0] + b->offset;
201                 else
202                     Above = dst - dst_stride;
203 
204                 if (i%4==0 && pbi->common.filter_level)
205                 {
206                     yleft = xd->recon_left[0] + i;
207                     left_stride = 1;
208                 }
209                 else
210                 {
211                     yleft = dst - 1;
212                     left_stride = dst_stride;
213                 }
214 
215                 if ((i==4 || i==8 || i==12) && pbi->common.filter_level)
216                     top_left = *(xd->recon_left[0] + i - 1);
217                 else
218                     top_left = Above[-1];
219 
220                 vp8_intra4x4_predict(Above, yleft, left_stride,
221                                      b_mode, dst, dst_stride, top_left);
222 
223                 if (xd->eobs[i] )
224                 {
225                     if (xd->eobs[i] > 1)
226                     {
227                         vp8_dequant_idct_add(b->qcoeff, DQC, dst, dst_stride);
228                     }
229                     else
230                     {
231                         vp8_dc_only_idct_add(b->qcoeff[0] * DQC[0],
232                                              dst, dst_stride, dst, dst_stride);
233                         memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0]));
234                     }
235                 }
236             }
237         }
238     }
239     else
240     {
241         vp8_build_inter_predictors_mb(xd);
242     }
243 
244 
245 #if CONFIG_ERROR_CONCEALMENT
246     if (corruption_detected)
247     {
248         return;
249     }
250 #endif
251 
252     if(!xd->mode_info_context->mbmi.mb_skip_coeff)
253     {
254         /* dequantization and idct */
255         if (mode != B_PRED)
256         {
257             short *DQC = xd->dequant_y1;
258 
259             if (mode != SPLITMV)
260             {
261                 BLOCKD *b = &xd->block[24];
262 
263                 /* do 2nd order transform on the dc block */
264                 if (xd->eobs[24] > 1)
265                 {
266                     vp8_dequantize_b(b, xd->dequant_y2);
267 
268                     vp8_short_inv_walsh4x4(&b->dqcoeff[0],
269                         xd->qcoeff);
270                     memset(b->qcoeff, 0, 16 * sizeof(b->qcoeff[0]));
271                 }
272                 else
273                 {
274                     b->dqcoeff[0] = b->qcoeff[0] * xd->dequant_y2[0];
275                     vp8_short_inv_walsh4x4_1(&b->dqcoeff[0],
276                         xd->qcoeff);
277                     memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0]));
278                 }
279 
280                 /* override the dc dequant constant in order to preserve the
281                  * dc components
282                  */
283                 DQC = xd->dequant_y1_dc;
284             }
285 
286             vp8_dequant_idct_add_y_block
287                             (xd->qcoeff, DQC,
288                              xd->dst.y_buffer,
289                              xd->dst.y_stride, xd->eobs);
290         }
291 
292         vp8_dequant_idct_add_uv_block
293                         (xd->qcoeff+16*16, xd->dequant_uv,
294                          xd->dst.u_buffer, xd->dst.v_buffer,
295                          xd->dst.uv_stride, xd->eobs+16);
296     }
297 }
298 
mt_decode_mb_rows(VP8D_COMP * pbi,MACROBLOCKD * xd,int start_mb_row)299 static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row)
300 {
301     volatile const int *last_row_current_mb_col;
302     volatile int *current_mb_col;
303     int mb_row;
304     VP8_COMMON *pc = &pbi->common;
305     const int nsync = pbi->sync_range;
306     const int first_row_no_sync_above = pc->mb_cols + nsync;
307     int num_part = 1 << pbi->common.multi_token_partition;
308     int last_mb_row = start_mb_row;
309 
310     YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME];
311     YV12_BUFFER_CONFIG *yv12_fb_lst = pbi->dec_fb_ref[LAST_FRAME];
312 
313     int recon_y_stride = yv12_fb_new->y_stride;
314     int recon_uv_stride = yv12_fb_new->uv_stride;
315 
316     unsigned char *ref_buffer[MAX_REF_FRAMES][3];
317     unsigned char *dst_buffer[3];
318     int i;
319     int ref_fb_corrupted[MAX_REF_FRAMES];
320 
321     ref_fb_corrupted[INTRA_FRAME] = 0;
322 
323     for(i = 1; i < MAX_REF_FRAMES; i++)
324     {
325         YV12_BUFFER_CONFIG *this_fb = pbi->dec_fb_ref[i];
326 
327         ref_buffer[i][0] = this_fb->y_buffer;
328         ref_buffer[i][1] = this_fb->u_buffer;
329         ref_buffer[i][2] = this_fb->v_buffer;
330 
331         ref_fb_corrupted[i] = this_fb->corrupted;
332     }
333 
334     dst_buffer[0] = yv12_fb_new->y_buffer;
335     dst_buffer[1] = yv12_fb_new->u_buffer;
336     dst_buffer[2] = yv12_fb_new->v_buffer;
337 
338     xd->up_available = (start_mb_row != 0);
339 
340     for (mb_row = start_mb_row; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
341     {
342        int recon_yoffset, recon_uvoffset;
343        int mb_col;
344        int filter_level;
345        loop_filter_info_n *lfi_n = &pc->lf_info;
346 
347        /* save last row processed by this thread */
348        last_mb_row = mb_row;
349        /* select bool coder for current partition */
350        xd->current_bc =  &pbi->mbc[mb_row%num_part];
351 
352        if (mb_row > 0)
353            last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1];
354        else
355            last_row_current_mb_col = &first_row_no_sync_above;
356 
357        current_mb_col = &pbi->mt_current_mb_col[mb_row];
358 
359        recon_yoffset = mb_row * recon_y_stride * 16;
360        recon_uvoffset = mb_row * recon_uv_stride * 8;
361 
362        /* reset contexts */
363        xd->above_context = pc->above_context;
364        memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
365 
366        xd->left_available = 0;
367 
368        xd->mb_to_top_edge = -((mb_row * 16)) << 3;
369        xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
370 
371        if (pbi->common.filter_level)
372        {
373           xd->recon_above[0] = pbi->mt_yabove_row[mb_row] + 0*16 +32;
374           xd->recon_above[1] = pbi->mt_uabove_row[mb_row] + 0*8 +16;
375           xd->recon_above[2] = pbi->mt_vabove_row[mb_row] + 0*8 +16;
376 
377           xd->recon_left[0] = pbi->mt_yleft_col[mb_row];
378           xd->recon_left[1] = pbi->mt_uleft_col[mb_row];
379           xd->recon_left[2] = pbi->mt_vleft_col[mb_row];
380 
381           /* TODO: move to outside row loop */
382           xd->recon_left_stride[0] = 1;
383           xd->recon_left_stride[1] = 1;
384        }
385        else
386        {
387           xd->recon_above[0] = dst_buffer[0] + recon_yoffset;
388           xd->recon_above[1] = dst_buffer[1] + recon_uvoffset;
389           xd->recon_above[2] = dst_buffer[2] + recon_uvoffset;
390 
391           xd->recon_left[0] = xd->recon_above[0] - 1;
392           xd->recon_left[1] = xd->recon_above[1] - 1;
393           xd->recon_left[2] = xd->recon_above[2] - 1;
394 
395           xd->recon_above[0] -= xd->dst.y_stride;
396           xd->recon_above[1] -= xd->dst.uv_stride;
397           xd->recon_above[2] -= xd->dst.uv_stride;
398 
399           /* TODO: move to outside row loop */
400           xd->recon_left_stride[0] = xd->dst.y_stride;
401           xd->recon_left_stride[1] = xd->dst.uv_stride;
402 
403           setup_intra_recon_left(xd->recon_left[0], xd->recon_left[1],
404                                  xd->recon_left[2], xd->dst.y_stride,
405                                  xd->dst.uv_stride);
406        }
407 
408        for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
409        {
410            *current_mb_col = mb_col - 1;
411 
412            if ((mb_col & (nsync - 1)) == 0)
413            {
414                while (mb_col > (*last_row_current_mb_col - nsync))
415                {
416                    x86_pause_hint();
417                    thread_sleep(0);
418                }
419            }
420 
421            /* Distance of MB to the various image edges.
422             * These are specified to 8th pel as they are always
423             * compared to values that are in 1/8th pel units.
424             */
425            xd->mb_to_left_edge = -((mb_col * 16) << 3);
426            xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
427 
428     #if CONFIG_ERROR_CONCEALMENT
429            {
430                int corrupt_residual =
431                            (!pbi->independent_partitions &&
432                            pbi->frame_corrupt_residual) ||
433                            vp8dx_bool_error(xd->current_bc);
434                if (pbi->ec_active &&
435                    (xd->mode_info_context->mbmi.ref_frame ==
436                                                     INTRA_FRAME) &&
437                    corrupt_residual)
438                {
439                    /* We have an intra block with corrupt
440                     * coefficients, better to conceal with an inter
441                     * block.
442                     * Interpolate MVs from neighboring MBs
443                     *
444                     * Note that for the first mb with corrupt
445                     * residual in a frame, we might not discover
446                     * that before decoding the residual. That
447                     * happens after this check, and therefore no
448                     * inter concealment will be done.
449                     */
450                    vp8_interpolate_motion(xd,
451                                           mb_row, mb_col,
452                                           pc->mb_rows, pc->mb_cols,
453                                           pc->mode_info_stride);
454                }
455            }
456     #endif
457 
458 
459            xd->dst.y_buffer = dst_buffer[0] + recon_yoffset;
460            xd->dst.u_buffer = dst_buffer[1] + recon_uvoffset;
461            xd->dst.v_buffer = dst_buffer[2] + recon_uvoffset;
462 
463            xd->pre.y_buffer = ref_buffer[xd->mode_info_context->mbmi.ref_frame][0] + recon_yoffset;
464            xd->pre.u_buffer = ref_buffer[xd->mode_info_context->mbmi.ref_frame][1] + recon_uvoffset;
465            xd->pre.v_buffer = ref_buffer[xd->mode_info_context->mbmi.ref_frame][2] + recon_uvoffset;
466 
467            /* propagate errors from reference frames */
468            xd->corrupted |= ref_fb_corrupted[xd->mode_info_context->mbmi.ref_frame];
469 
470            mt_decode_macroblock(pbi, xd, 0);
471 
472            xd->left_available = 1;
473 
474            /* check if the boolean decoder has suffered an error */
475            xd->corrupted |= vp8dx_bool_error(xd->current_bc);
476 
477            xd->recon_above[0] += 16;
478            xd->recon_above[1] += 8;
479            xd->recon_above[2] += 8;
480 
481            if (!pbi->common.filter_level)
482            {
483               xd->recon_left[0] += 16;
484               xd->recon_left[1] += 8;
485               xd->recon_left[2] += 8;
486            }
487 
488            if (pbi->common.filter_level)
489            {
490                int skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED &&
491                                xd->mode_info_context->mbmi.mode != SPLITMV &&
492                                xd->mode_info_context->mbmi.mb_skip_coeff);
493 
494                const int mode_index = lfi_n->mode_lf_lut[xd->mode_info_context->mbmi.mode];
495                const int seg = xd->mode_info_context->mbmi.segment_id;
496                const int ref_frame = xd->mode_info_context->mbmi.ref_frame;
497 
498                filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
499 
500                if( mb_row != pc->mb_rows-1 )
501                {
502                    /* Save decoded MB last row data for next-row decoding */
503                    memcpy((pbi->mt_yabove_row[mb_row + 1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16);
504                    memcpy((pbi->mt_uabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8);
505                    memcpy((pbi->mt_vabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8);
506                }
507 
508                /* save left_col for next MB decoding */
509                if(mb_col != pc->mb_cols-1)
510                {
511                    MODE_INFO *next = xd->mode_info_context +1;
512 
513                    if (next->mbmi.ref_frame == INTRA_FRAME)
514                    {
515                        for (i = 0; i < 16; i++)
516                            pbi->mt_yleft_col[mb_row][i] = xd->dst.y_buffer [i* recon_y_stride + 15];
517                        for (i = 0; i < 8; i++)
518                        {
519                            pbi->mt_uleft_col[mb_row][i] = xd->dst.u_buffer [i* recon_uv_stride + 7];
520                            pbi->mt_vleft_col[mb_row][i] = xd->dst.v_buffer [i* recon_uv_stride + 7];
521                        }
522                    }
523                }
524 
525                /* loopfilter on this macroblock. */
526                if (filter_level)
527                {
528                    if(pc->filter_type == NORMAL_LOOPFILTER)
529                    {
530                        loop_filter_info lfi;
531                        FRAME_TYPE frame_type = pc->frame_type;
532                        const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
533                        lfi.mblim = lfi_n->mblim[filter_level];
534                        lfi.blim = lfi_n->blim[filter_level];
535                        lfi.lim = lfi_n->lim[filter_level];
536                        lfi.hev_thr = lfi_n->hev_thr[hev_index];
537 
538                        if (mb_col > 0)
539                            vp8_loop_filter_mbv
540                            (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
541 
542                        if (!skip_lf)
543                            vp8_loop_filter_bv
544                            (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
545 
546                        /* don't apply across umv border */
547                        if (mb_row > 0)
548                            vp8_loop_filter_mbh
549                            (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
550 
551                        if (!skip_lf)
552                            vp8_loop_filter_bh
553                            (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer,  recon_y_stride, recon_uv_stride, &lfi);
554                    }
555                    else
556                    {
557                        if (mb_col > 0)
558                            vp8_loop_filter_simple_mbv
559                            (xd->dst.y_buffer, recon_y_stride, lfi_n->mblim[filter_level]);
560 
561                        if (!skip_lf)
562                            vp8_loop_filter_simple_bv
563                            (xd->dst.y_buffer, recon_y_stride, lfi_n->blim[filter_level]);
564 
565                        /* don't apply across umv border */
566                        if (mb_row > 0)
567                            vp8_loop_filter_simple_mbh
568                            (xd->dst.y_buffer, recon_y_stride, lfi_n->mblim[filter_level]);
569 
570                        if (!skip_lf)
571                            vp8_loop_filter_simple_bh
572                            (xd->dst.y_buffer, recon_y_stride, lfi_n->blim[filter_level]);
573                    }
574                }
575 
576            }
577 
578            recon_yoffset += 16;
579            recon_uvoffset += 8;
580 
581            ++xd->mode_info_context;  /* next mb */
582 
583            xd->above_context++;
584        }
585 
586        /* adjust to the next row of mbs */
587        if (pbi->common.filter_level)
588        {
589            if(mb_row != pc->mb_rows-1)
590            {
591                int lasty = yv12_fb_lst->y_width + VP8BORDERINPIXELS;
592                int lastuv = (yv12_fb_lst->y_width>>1) + (VP8BORDERINPIXELS>>1);
593 
594                for (i = 0; i < 4; i++)
595                {
596                    pbi->mt_yabove_row[mb_row +1][lasty + i] = pbi->mt_yabove_row[mb_row +1][lasty -1];
597                    pbi->mt_uabove_row[mb_row +1][lastuv + i] = pbi->mt_uabove_row[mb_row +1][lastuv -1];
598                    pbi->mt_vabove_row[mb_row +1][lastuv + i] = pbi->mt_vabove_row[mb_row +1][lastuv -1];
599                }
600            }
601        }
602        else
603            vp8_extend_mb_row(yv12_fb_new, xd->dst.y_buffer + 16,
604                              xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
605 
606        /* last MB of row is ready just after extension is done */
607        *current_mb_col = mb_col + nsync;
608 
609        ++xd->mode_info_context;      /* skip prediction column */
610        xd->up_available = 1;
611 
612        /* since we have multithread */
613        xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
614     }
615 
616     /* signal end of frame decoding if this thread processed the last mb_row */
617     if (last_mb_row == (pc->mb_rows - 1))
618         sem_post(&pbi->h_event_end_decoding);
619 
620 }
621 
622 
thread_decoding_proc(void * p_data)623 static THREAD_FUNCTION thread_decoding_proc(void *p_data)
624 {
625     int ithread = ((DECODETHREAD_DATA *)p_data)->ithread;
626     VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1);
627     MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2);
628     ENTROPY_CONTEXT_PLANES mb_row_left_context;
629 
630     while (1)
631     {
632         if (pbi->b_multithreaded_rd == 0)
633             break;
634 
635         if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0)
636         {
637             if (pbi->b_multithreaded_rd == 0)
638                 break;
639             else
640             {
641                 MACROBLOCKD *xd = &mbrd->mbd;
642                 xd->left_context = &mb_row_left_context;
643 
644                 mt_decode_mb_rows(pbi, xd, ithread+1);
645             }
646         }
647     }
648 
649     return 0 ;
650 }
651 
652 
vp8_decoder_create_threads(VP8D_COMP * pbi)653 void vp8_decoder_create_threads(VP8D_COMP *pbi)
654 {
655     int core_count = 0;
656     unsigned int ithread;
657 
658     pbi->b_multithreaded_rd = 0;
659     pbi->allocated_decoding_thread_count = 0;
660 
661     /* limit decoding threads to the max number of token partitions */
662     core_count = (pbi->max_threads > 8) ? 8 : pbi->max_threads;
663 
664     /* limit decoding threads to the available cores */
665     if (core_count > pbi->common.processor_core_count)
666         core_count = pbi->common.processor_core_count;
667 
668     if (core_count > 1)
669     {
670         pbi->b_multithreaded_rd = 1;
671         pbi->decoding_thread_count = core_count - 1;
672 
673         CALLOC_ARRAY(pbi->h_decoding_thread, pbi->decoding_thread_count);
674         CALLOC_ARRAY(pbi->h_event_start_decoding, pbi->decoding_thread_count);
675         CALLOC_ARRAY_ALIGNED(pbi->mb_row_di, pbi->decoding_thread_count, 32);
676         CALLOC_ARRAY(pbi->de_thread_data, pbi->decoding_thread_count);
677 
678         for (ithread = 0; ithread < pbi->decoding_thread_count; ithread++)
679         {
680             sem_init(&pbi->h_event_start_decoding[ithread], 0, 0);
681 
682             vp8_setup_block_dptrs(&pbi->mb_row_di[ithread].mbd);
683 
684             pbi->de_thread_data[ithread].ithread  = ithread;
685             pbi->de_thread_data[ithread].ptr1     = (void *)pbi;
686             pbi->de_thread_data[ithread].ptr2     = (void *) &pbi->mb_row_di[ithread];
687 
688             pthread_create(&pbi->h_decoding_thread[ithread], 0, thread_decoding_proc, (&pbi->de_thread_data[ithread]));
689         }
690 
691         sem_init(&pbi->h_event_end_decoding, 0, 0);
692 
693         pbi->allocated_decoding_thread_count = pbi->decoding_thread_count;
694     }
695 }
696 
697 
vp8mt_de_alloc_temp_buffers(VP8D_COMP * pbi,int mb_rows)698 void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows)
699 {
700     int i;
701 
702     if (pbi->b_multithreaded_rd)
703     {
704             vpx_free(pbi->mt_current_mb_col);
705             pbi->mt_current_mb_col = NULL ;
706 
707         /* Free above_row buffers. */
708         if (pbi->mt_yabove_row)
709         {
710             for (i=0; i< mb_rows; i++)
711             {
712                     vpx_free(pbi->mt_yabove_row[i]);
713                     pbi->mt_yabove_row[i] = NULL ;
714             }
715             vpx_free(pbi->mt_yabove_row);
716             pbi->mt_yabove_row = NULL ;
717         }
718 
719         if (pbi->mt_uabove_row)
720         {
721             for (i=0; i< mb_rows; i++)
722             {
723                     vpx_free(pbi->mt_uabove_row[i]);
724                     pbi->mt_uabove_row[i] = NULL ;
725             }
726             vpx_free(pbi->mt_uabove_row);
727             pbi->mt_uabove_row = NULL ;
728         }
729 
730         if (pbi->mt_vabove_row)
731         {
732             for (i=0; i< mb_rows; i++)
733             {
734                     vpx_free(pbi->mt_vabove_row[i]);
735                     pbi->mt_vabove_row[i] = NULL ;
736             }
737             vpx_free(pbi->mt_vabove_row);
738             pbi->mt_vabove_row = NULL ;
739         }
740 
741         /* Free left_col buffers. */
742         if (pbi->mt_yleft_col)
743         {
744             for (i=0; i< mb_rows; i++)
745             {
746                     vpx_free(pbi->mt_yleft_col[i]);
747                     pbi->mt_yleft_col[i] = NULL ;
748             }
749             vpx_free(pbi->mt_yleft_col);
750             pbi->mt_yleft_col = NULL ;
751         }
752 
753         if (pbi->mt_uleft_col)
754         {
755             for (i=0; i< mb_rows; i++)
756             {
757                     vpx_free(pbi->mt_uleft_col[i]);
758                     pbi->mt_uleft_col[i] = NULL ;
759             }
760             vpx_free(pbi->mt_uleft_col);
761             pbi->mt_uleft_col = NULL ;
762         }
763 
764         if (pbi->mt_vleft_col)
765         {
766             for (i=0; i< mb_rows; i++)
767             {
768                     vpx_free(pbi->mt_vleft_col[i]);
769                     pbi->mt_vleft_col[i] = NULL ;
770             }
771             vpx_free(pbi->mt_vleft_col);
772             pbi->mt_vleft_col = NULL ;
773         }
774     }
775 }
776 
777 
vp8mt_alloc_temp_buffers(VP8D_COMP * pbi,int width,int prev_mb_rows)778 void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows)
779 {
780     VP8_COMMON *const pc = & pbi->common;
781     int i;
782     int uv_width;
783 
784     if (pbi->b_multithreaded_rd)
785     {
786         vp8mt_de_alloc_temp_buffers(pbi, prev_mb_rows);
787 
788         /* our internal buffers are always multiples of 16 */
789         if ((width & 0xf) != 0)
790             width += 16 - (width & 0xf);
791 
792         if (width < 640) pbi->sync_range = 1;
793         else if (width <= 1280) pbi->sync_range = 8;
794         else if (width <= 2560) pbi->sync_range =16;
795         else pbi->sync_range = 32;
796 
797         uv_width = width >>1;
798 
799         /* Allocate an int for each mb row. */
800         CALLOC_ARRAY(pbi->mt_current_mb_col, pc->mb_rows);
801 
802         /* Allocate memory for above_row buffers. */
803         CALLOC_ARRAY(pbi->mt_yabove_row, pc->mb_rows);
804         for (i = 0; i < pc->mb_rows; i++)
805             CHECK_MEM_ERROR(pbi->mt_yabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (width + (VP8BORDERINPIXELS<<1))));
806 
807         CALLOC_ARRAY(pbi->mt_uabove_row, pc->mb_rows);
808         for (i = 0; i < pc->mb_rows; i++)
809             CHECK_MEM_ERROR(pbi->mt_uabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS)));
810 
811         CALLOC_ARRAY(pbi->mt_vabove_row, pc->mb_rows);
812         for (i = 0; i < pc->mb_rows; i++)
813             CHECK_MEM_ERROR(pbi->mt_vabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS)));
814 
815         /* Allocate memory for left_col buffers. */
816         CALLOC_ARRAY(pbi->mt_yleft_col, pc->mb_rows);
817         for (i = 0; i < pc->mb_rows; i++)
818             CHECK_MEM_ERROR(pbi->mt_yleft_col[i], vpx_calloc(sizeof(unsigned char) * 16, 1));
819 
820         CALLOC_ARRAY(pbi->mt_uleft_col, pc->mb_rows);
821         for (i = 0; i < pc->mb_rows; i++)
822             CHECK_MEM_ERROR(pbi->mt_uleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1));
823 
824         CALLOC_ARRAY(pbi->mt_vleft_col, pc->mb_rows);
825         for (i = 0; i < pc->mb_rows; i++)
826             CHECK_MEM_ERROR(pbi->mt_vleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1));
827     }
828 }
829 
830 
vp8_decoder_remove_threads(VP8D_COMP * pbi)831 void vp8_decoder_remove_threads(VP8D_COMP *pbi)
832 {
833     /* shutdown MB Decoding thread; */
834     if (pbi->b_multithreaded_rd)
835     {
836         int i;
837 
838         pbi->b_multithreaded_rd = 0;
839 
840         /* allow all threads to exit */
841         for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
842         {
843             sem_post(&pbi->h_event_start_decoding[i]);
844             pthread_join(pbi->h_decoding_thread[i], NULL);
845         }
846 
847         for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
848         {
849             sem_destroy(&pbi->h_event_start_decoding[i]);
850         }
851 
852         sem_destroy(&pbi->h_event_end_decoding);
853 
854             vpx_free(pbi->h_decoding_thread);
855             pbi->h_decoding_thread = NULL;
856 
857             vpx_free(pbi->h_event_start_decoding);
858             pbi->h_event_start_decoding = NULL;
859 
860             vpx_free(pbi->mb_row_di);
861             pbi->mb_row_di = NULL ;
862 
863             vpx_free(pbi->de_thread_data);
864             pbi->de_thread_data = NULL;
865     }
866 }
867 
vp8mt_decode_mb_rows(VP8D_COMP * pbi,MACROBLOCKD * xd)868 void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
869 {
870     VP8_COMMON *pc = &pbi->common;
871     unsigned int i;
872     int j;
873 
874     int filter_level = pc->filter_level;
875     YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME];
876 
877     if (filter_level)
878     {
879         /* Set above_row buffer to 127 for decoding first MB row */
880         memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, yv12_fb_new->y_width + 5);
881         memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5);
882         memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5);
883 
884         for (j=1; j<pc->mb_rows; j++)
885         {
886             memset(pbi->mt_yabove_row[j] + VP8BORDERINPIXELS-1, (unsigned char)129, 1);
887             memset(pbi->mt_uabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
888             memset(pbi->mt_vabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
889         }
890 
891         /* Set left_col to 129 initially */
892         for (j=0; j<pc->mb_rows; j++)
893         {
894             memset(pbi->mt_yleft_col[j], (unsigned char)129, 16);
895             memset(pbi->mt_uleft_col[j], (unsigned char)129, 8);
896             memset(pbi->mt_vleft_col[j], (unsigned char)129, 8);
897         }
898 
899         /* Initialize the loop filter for this frame. */
900         vp8_loop_filter_frame_init(pc, &pbi->mb, filter_level);
901     }
902     else
903         vp8_setup_intra_recon_top_line(yv12_fb_new);
904 
905     setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count);
906 
907     for (i = 0; i < pbi->decoding_thread_count; i++)
908         sem_post(&pbi->h_event_start_decoding[i]);
909 
910     mt_decode_mb_rows(pbi, xd, 0);
911 
912     sem_wait(&pbi->h_event_end_decoding);   /* add back for each frame */
913 }
914