1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "./vp9_rtcd.h"
12 #include "./vpx_config.h"
13 #include "./vpx_dsp_rtcd.h"
14 
15 #include "vpx_dsp/quantize.h"
16 #include "vpx_mem/vpx_mem.h"
17 #include "vpx_ports/mem.h"
18 
19 #include "vp9/common/vp9_idct.h"
20 #include "vp9/common/vp9_reconinter.h"
21 #include "vp9/common/vp9_reconintra.h"
22 #include "vp9/common/vp9_scan.h"
23 
24 #include "vp9/encoder/vp9_encodemb.h"
25 #include "vp9/encoder/vp9_rd.h"
26 #include "vp9/encoder/vp9_tokenize.h"
27 
28 struct optimize_ctx {
29   ENTROPY_CONTEXT ta[MAX_MB_PLANE][16];
30   ENTROPY_CONTEXT tl[MAX_MB_PLANE][16];
31 };
32 
vp9_subtract_plane(MACROBLOCK * x,BLOCK_SIZE bsize,int plane)33 void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
34   struct macroblock_plane *const p = &x->plane[plane];
35   const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
36   const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
37   const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
38   const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize];
39 
40 #if CONFIG_VP9_HIGHBITDEPTH
41   if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
42     vpx_highbd_subtract_block(bh, bw, p->src_diff, bw, p->src.buf,
43                               p->src.stride, pd->dst.buf, pd->dst.stride,
44                               x->e_mbd.bd);
45     return;
46   }
47 #endif  // CONFIG_VP9_HIGHBITDEPTH
48   vpx_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
49                      pd->dst.buf, pd->dst.stride);
50 }
51 
52 static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
53   { 10, 6 },
54   { 8, 5 },
55 };
56 
57 // 'num' can be negative, but 'shift' must be non-negative.
58 #define RIGHT_SHIFT_POSSIBLY_NEGATIVE(num, shift) \
59   ((num) >= 0) ? (num) >> (shift) : -((-(num)) >> (shift))
60 
vp9_optimize_b(MACROBLOCK * mb,int plane,int block,TX_SIZE tx_size,int ctx)61 int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
62                    int ctx) {
63   MACROBLOCKD *const xd = &mb->e_mbd;
64   struct macroblock_plane *const p = &mb->plane[plane];
65   struct macroblockd_plane *const pd = &xd->plane[plane];
66   const int ref = is_inter_block(xd->mi[0]);
67   uint8_t token_cache[1024];
68   const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
69   tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
70   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
71   const int eob = p->eobs[block];
72   const PLANE_TYPE plane_type = get_plane_type(plane);
73   const int default_eob = 16 << (tx_size << 1);
74   const int shift = (tx_size == TX_32X32);
75   const int16_t *const dequant_ptr = pd->dequant;
76   const uint8_t *const band_translate = get_band_translate(tx_size);
77   const scan_order *const so = get_scan(xd, tx_size, plane_type, block);
78   const int16_t *const scan = so->scan;
79   const int16_t *const nb = so->neighbors;
80   const MODE_INFO *mbmi = xd->mi[0];
81   const int sharpness = mb->sharpness;
82   const int64_t rdadj = (int64_t)mb->rdmult * plane_rd_mult[ref][plane_type];
83   const int64_t rdmult =
84       (sharpness == 0 ? rdadj >> 1
85                       : (rdadj * (8 - sharpness + mbmi->segment_id)) >> 4);
86 
87   const int64_t rddiv = mb->rddiv;
88   int64_t rd_cost0, rd_cost1;
89   int64_t rate0, rate1;
90   int16_t t0, t1;
91   int i, final_eob;
92   int count_high_values_after_eob = 0;
93 #if CONFIG_VP9_HIGHBITDEPTH
94   const uint16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd);
95 #else
96   const uint16_t *cat6_high_cost = vp9_get_high_cost_table(8);
97 #endif
98   unsigned int(*const token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
99       mb->token_costs[tx_size][plane_type][ref];
100   unsigned int(*token_costs_cur)[2][COEFF_CONTEXTS][ENTROPY_TOKENS];
101   int64_t eob_cost0, eob_cost1;
102   const int ctx0 = ctx;
103   int64_t accu_rate = 0;
104   // Initialized to the worst possible error for the largest transform size.
105   // This ensures that it never goes negative.
106   int64_t accu_error = ((int64_t)1) << 50;
107   int64_t best_block_rd_cost = INT64_MAX;
108   int x_prev = 1;
109   tran_low_t before_best_eob_qc = 0;
110   tran_low_t before_best_eob_dqc = 0;
111 
112   assert((!plane_type && !plane) || (plane_type && plane));
113   assert(eob <= default_eob);
114 
115   for (i = 0; i < eob; i++) {
116     const int rc = scan[i];
117     token_cache[rc] = vp9_pt_energy_class[vp9_get_token(qcoeff[rc])];
118   }
119   final_eob = 0;
120 
121   // Initial RD cost.
122   token_costs_cur = token_costs + band_translate[0];
123   rate0 = (*token_costs_cur)[0][ctx0][EOB_TOKEN];
124   best_block_rd_cost = RDCOST(rdmult, rddiv, rate0, accu_error);
125 
126   // For each token, pick one of two choices greedily:
127   // (i) First candidate: Keep current quantized value, OR
128   // (ii) Second candidate: Reduce quantized value by 1.
129   for (i = 0; i < eob; i++) {
130     const int rc = scan[i];
131     const int x = qcoeff[rc];
132     const int band_cur = band_translate[i];
133     const int ctx_cur = (i == 0) ? ctx : get_coef_context(nb, token_cache, i);
134     const int token_tree_sel_cur = (x_prev == 0);
135     token_costs_cur = token_costs + band_cur;
136     if (x == 0) {  // No need to search
137       const int token = vp9_get_token(x);
138       rate0 = (*token_costs_cur)[token_tree_sel_cur][ctx_cur][token];
139       accu_rate += rate0;
140       x_prev = 0;
141       // Note: accu_error does not change.
142     } else {
143       const int dqv = dequant_ptr[rc != 0];
144       // Compute the distortion for quantizing to 0.
145       const int diff_for_zero_raw = (0 - coeff[rc]) * (1 << shift);
146       const int diff_for_zero =
147 #if CONFIG_VP9_HIGHBITDEPTH
148           (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
149               ? RIGHT_SHIFT_POSSIBLY_NEGATIVE(diff_for_zero_raw, xd->bd - 8)
150               :
151 #endif
152               diff_for_zero_raw;
153       const int64_t distortion_for_zero =
154           (int64_t)diff_for_zero * diff_for_zero;
155 
156       // Compute the distortion for the first candidate
157       const int diff0_raw = (dqcoeff[rc] - coeff[rc]) * (1 << shift);
158       const int diff0 =
159 #if CONFIG_VP9_HIGHBITDEPTH
160           (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
161               ? RIGHT_SHIFT_POSSIBLY_NEGATIVE(diff0_raw, xd->bd - 8)
162               :
163 #endif  // CONFIG_VP9_HIGHBITDEPTH
164               diff0_raw;
165       const int64_t distortion0 = (int64_t)diff0 * diff0;
166 
167       // Compute the distortion for the second candidate
168       const int sign = -(x < 0);        // -1 if x is negative and 0 otherwise.
169       const int x1 = x - 2 * sign - 1;  // abs(x1) = abs(x) - 1.
170       int64_t distortion1;
171       if (x1 != 0) {
172         const int dqv_step =
173 #if CONFIG_VP9_HIGHBITDEPTH
174             (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? dqv >> (xd->bd - 8)
175                                                           :
176 #endif  // CONFIG_VP9_HIGHBITDEPTH
177                                                           dqv;
178         const int diff_step = (dqv_step + sign) ^ sign;
179         const int diff1 = diff0 - diff_step;
180         assert(dqv > 0);  // We aren't right shifting a negative number above.
181         distortion1 = (int64_t)diff1 * diff1;
182       } else {
183         distortion1 = distortion_for_zero;
184       }
185       {
186         // Calculate RDCost for current coeff for the two candidates.
187         const int64_t base_bits0 = vp9_get_token_cost(x, &t0, cat6_high_cost);
188         const int64_t base_bits1 = vp9_get_token_cost(x1, &t1, cat6_high_cost);
189         rate0 =
190             base_bits0 + (*token_costs_cur)[token_tree_sel_cur][ctx_cur][t0];
191         rate1 =
192             base_bits1 + (*token_costs_cur)[token_tree_sel_cur][ctx_cur][t1];
193       }
194       {
195         int rdcost_better_for_x1, eob_rdcost_better_for_x1;
196         int dqc0, dqc1;
197         int64_t best_eob_cost_cur;
198         int use_x1;
199 
200         // Calculate RD Cost effect on the next coeff for the two candidates.
201         int64_t next_bits0 = 0;
202         int64_t next_bits1 = 0;
203         int64_t next_eob_bits0 = 0;
204         int64_t next_eob_bits1 = 0;
205         if (i < default_eob - 1) {
206           int ctx_next, token_tree_sel_next;
207           const int band_next = band_translate[i + 1];
208           const int token_next =
209               (i + 1 != eob) ? vp9_get_token(qcoeff[scan[i + 1]]) : EOB_TOKEN;
210           unsigned int(*const token_costs_next)[2][COEFF_CONTEXTS]
211                                                [ENTROPY_TOKENS] =
212                                                    token_costs + band_next;
213           token_cache[rc] = vp9_pt_energy_class[t0];
214           ctx_next = get_coef_context(nb, token_cache, i + 1);
215           token_tree_sel_next = (x == 0);
216           next_bits0 =
217               (*token_costs_next)[token_tree_sel_next][ctx_next][token_next];
218           next_eob_bits0 =
219               (*token_costs_next)[token_tree_sel_next][ctx_next][EOB_TOKEN];
220           token_cache[rc] = vp9_pt_energy_class[t1];
221           ctx_next = get_coef_context(nb, token_cache, i + 1);
222           token_tree_sel_next = (x1 == 0);
223           next_bits1 =
224               (*token_costs_next)[token_tree_sel_next][ctx_next][token_next];
225           if (x1 != 0) {
226             next_eob_bits1 =
227                 (*token_costs_next)[token_tree_sel_next][ctx_next][EOB_TOKEN];
228           }
229         }
230 
231         // Compare the total RD costs for two candidates.
232         rd_cost0 = RDCOST(rdmult, rddiv, (rate0 + next_bits0), distortion0);
233         rd_cost1 = RDCOST(rdmult, rddiv, (rate1 + next_bits1), distortion1);
234         rdcost_better_for_x1 = (rd_cost1 < rd_cost0);
235         eob_cost0 = RDCOST(rdmult, rddiv, (accu_rate + rate0 + next_eob_bits0),
236                            (accu_error + distortion0 - distortion_for_zero));
237         eob_cost1 = eob_cost0;
238         if (x1 != 0) {
239           eob_cost1 =
240               RDCOST(rdmult, rddiv, (accu_rate + rate1 + next_eob_bits1),
241                      (accu_error + distortion1 - distortion_for_zero));
242           eob_rdcost_better_for_x1 = (eob_cost1 < eob_cost0);
243         } else {
244           eob_rdcost_better_for_x1 = 0;
245         }
246 
247         // Calculate the two candidate de-quantized values.
248         dqc0 = dqcoeff[rc];
249         dqc1 = 0;
250         if (rdcost_better_for_x1 + eob_rdcost_better_for_x1) {
251           if (x1 != 0) {
252             dqc1 = RIGHT_SHIFT_POSSIBLY_NEGATIVE(x1 * dqv, shift);
253           } else {
254             dqc1 = 0;
255           }
256         }
257 
258         // Pick and record the better quantized and de-quantized values.
259         if (rdcost_better_for_x1) {
260           qcoeff[rc] = x1;
261           dqcoeff[rc] = dqc1;
262           accu_rate += rate1;
263           accu_error += distortion1 - distortion_for_zero;
264           assert(distortion1 <= distortion_for_zero);
265           token_cache[rc] = vp9_pt_energy_class[t1];
266         } else {
267           accu_rate += rate0;
268           accu_error += distortion0 - distortion_for_zero;
269           assert(distortion0 <= distortion_for_zero);
270           token_cache[rc] = vp9_pt_energy_class[t0];
271         }
272         if (sharpness > 0 && abs(qcoeff[rc]) > 1) count_high_values_after_eob++;
273         assert(accu_error >= 0);
274         x_prev = qcoeff[rc];  // Update based on selected quantized value.
275 
276         use_x1 = (x1 != 0) && eob_rdcost_better_for_x1;
277         best_eob_cost_cur = use_x1 ? eob_cost1 : eob_cost0;
278 
279         // Determine whether to move the eob position to i+1
280         if (best_eob_cost_cur < best_block_rd_cost) {
281           best_block_rd_cost = best_eob_cost_cur;
282           final_eob = i + 1;
283           count_high_values_after_eob = 0;
284           if (use_x1) {
285             before_best_eob_qc = x1;
286             before_best_eob_dqc = dqc1;
287           } else {
288             before_best_eob_qc = x;
289             before_best_eob_dqc = dqc0;
290           }
291         }
292       }
293     }
294   }
295   if (count_high_values_after_eob > 0) {
296     final_eob = eob - 1;
297     for (; final_eob >= 0; final_eob--) {
298       const int rc = scan[final_eob];
299       const int x = qcoeff[rc];
300       if (x) {
301         break;
302       }
303     }
304     final_eob++;
305   } else {
306     assert(final_eob <= eob);
307     if (final_eob > 0) {
308       int rc;
309       assert(before_best_eob_qc != 0);
310       i = final_eob - 1;
311       rc = scan[i];
312       qcoeff[rc] = before_best_eob_qc;
313       dqcoeff[rc] = before_best_eob_dqc;
314     }
315     for (i = final_eob; i < eob; i++) {
316       int rc = scan[i];
317       qcoeff[rc] = 0;
318       dqcoeff[rc] = 0;
319     }
320   }
321   mb->plane[plane].eobs[block] = final_eob;
322   return final_eob;
323 }
324 #undef RIGHT_SHIFT_POSSIBLY_NEGATIVE
325 
fdct32x32(int rd_transform,const int16_t * src,tran_low_t * dst,int src_stride)326 static INLINE void fdct32x32(int rd_transform, const int16_t *src,
327                              tran_low_t *dst, int src_stride) {
328   if (rd_transform)
329     vpx_fdct32x32_rd(src, dst, src_stride);
330   else
331     vpx_fdct32x32(src, dst, src_stride);
332 }
333 
334 #if CONFIG_VP9_HIGHBITDEPTH
highbd_fdct32x32(int rd_transform,const int16_t * src,tran_low_t * dst,int src_stride)335 static INLINE void highbd_fdct32x32(int rd_transform, const int16_t *src,
336                                     tran_low_t *dst, int src_stride) {
337   if (rd_transform)
338     vpx_highbd_fdct32x32_rd(src, dst, src_stride);
339   else
340     vpx_highbd_fdct32x32(src, dst, src_stride);
341 }
342 #endif  // CONFIG_VP9_HIGHBITDEPTH
343 
vp9_xform_quant_fp(MACROBLOCK * x,int plane,int block,int row,int col,BLOCK_SIZE plane_bsize,TX_SIZE tx_size)344 void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col,
345                         BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
346   MACROBLOCKD *const xd = &x->e_mbd;
347   const struct macroblock_plane *const p = &x->plane[plane];
348   const struct macroblockd_plane *const pd = &xd->plane[plane];
349   const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
350   tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
351   tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
352   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
353   uint16_t *const eob = &p->eobs[block];
354   const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
355   const int16_t *src_diff;
356   src_diff = &p->src_diff[4 * (row * diff_stride + col)];
357   // skip block condition should be handled before this is called.
358   assert(!x->skip_block);
359 
360 #if CONFIG_VP9_HIGHBITDEPTH
361   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
362     switch (tx_size) {
363       case TX_32X32:
364         highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
365         vp9_highbd_quantize_fp_32x32(coeff, 1024, x->skip_block, p->round_fp,
366                                      p->quant_fp, qcoeff, dqcoeff, pd->dequant,
367                                      eob, scan_order->scan, scan_order->iscan);
368         break;
369       case TX_16X16:
370         vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
371         vp9_highbd_quantize_fp(coeff, 256, x->skip_block, p->round_fp,
372                                p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob,
373                                scan_order->scan, scan_order->iscan);
374         break;
375       case TX_8X8:
376         vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
377         vp9_highbd_quantize_fp(coeff, 64, x->skip_block, p->round_fp,
378                                p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob,
379                                scan_order->scan, scan_order->iscan);
380         break;
381       default:
382         assert(tx_size == TX_4X4);
383         x->fwd_txfm4x4(src_diff, coeff, diff_stride);
384         vp9_highbd_quantize_fp(coeff, 16, x->skip_block, p->round_fp,
385                                p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob,
386                                scan_order->scan, scan_order->iscan);
387         break;
388     }
389     return;
390   }
391 #endif  // CONFIG_VP9_HIGHBITDEPTH
392 
393   switch (tx_size) {
394     case TX_32X32:
395       fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
396       vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->round_fp,
397                             p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob,
398                             scan_order->scan, scan_order->iscan);
399       break;
400     case TX_16X16:
401       vpx_fdct16x16(src_diff, coeff, diff_stride);
402       vp9_quantize_fp(coeff, 256, x->skip_block, p->round_fp, p->quant_fp,
403                       qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
404                       scan_order->iscan);
405       break;
406     case TX_8X8:
407       vp9_fdct8x8_quant(src_diff, diff_stride, coeff, 64, x->skip_block,
408                         p->round_fp, p->quant_fp, qcoeff, dqcoeff, pd->dequant,
409                         eob, scan_order->scan, scan_order->iscan);
410       break;
411     default:
412       assert(tx_size == TX_4X4);
413       x->fwd_txfm4x4(src_diff, coeff, diff_stride);
414       vp9_quantize_fp(coeff, 16, x->skip_block, p->round_fp, p->quant_fp,
415                       qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
416                       scan_order->iscan);
417       break;
418   }
419 }
420 
vp9_xform_quant_dc(MACROBLOCK * x,int plane,int block,int row,int col,BLOCK_SIZE plane_bsize,TX_SIZE tx_size)421 void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, int row, int col,
422                         BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
423   MACROBLOCKD *const xd = &x->e_mbd;
424   const struct macroblock_plane *const p = &x->plane[plane];
425   const struct macroblockd_plane *const pd = &xd->plane[plane];
426   tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
427   tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
428   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
429   uint16_t *const eob = &p->eobs[block];
430   const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
431   const int16_t *src_diff;
432   src_diff = &p->src_diff[4 * (row * diff_stride + col)];
433   // skip block condition should be handled before this is called.
434   assert(!x->skip_block);
435 
436 #if CONFIG_VP9_HIGHBITDEPTH
437   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
438     switch (tx_size) {
439       case TX_32X32:
440         vpx_highbd_fdct32x32_1(src_diff, coeff, diff_stride);
441         vpx_highbd_quantize_dc_32x32(coeff, x->skip_block, p->round,
442                                      p->quant_fp[0], qcoeff, dqcoeff,
443                                      pd->dequant[0], eob);
444         break;
445       case TX_16X16:
446         vpx_highbd_fdct16x16_1(src_diff, coeff, diff_stride);
447         vpx_highbd_quantize_dc(coeff, 256, x->skip_block, p->round,
448                                p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0],
449                                eob);
450         break;
451       case TX_8X8:
452         vpx_highbd_fdct8x8_1(src_diff, coeff, diff_stride);
453         vpx_highbd_quantize_dc(coeff, 64, x->skip_block, p->round,
454                                p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0],
455                                eob);
456         break;
457       default:
458         assert(tx_size == TX_4X4);
459         x->fwd_txfm4x4(src_diff, coeff, diff_stride);
460         vpx_highbd_quantize_dc(coeff, 16, x->skip_block, p->round,
461                                p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0],
462                                eob);
463         break;
464     }
465     return;
466   }
467 #endif  // CONFIG_VP9_HIGHBITDEPTH
468 
469   switch (tx_size) {
470     case TX_32X32:
471       vpx_fdct32x32_1(src_diff, coeff, diff_stride);
472       vpx_quantize_dc_32x32(coeff, x->skip_block, p->round, p->quant_fp[0],
473                             qcoeff, dqcoeff, pd->dequant[0], eob);
474       break;
475     case TX_16X16:
476       vpx_fdct16x16_1(src_diff, coeff, diff_stride);
477       vpx_quantize_dc(coeff, 256, x->skip_block, p->round, p->quant_fp[0],
478                       qcoeff, dqcoeff, pd->dequant[0], eob);
479       break;
480     case TX_8X8:
481       vpx_fdct8x8_1(src_diff, coeff, diff_stride);
482       vpx_quantize_dc(coeff, 64, x->skip_block, p->round, p->quant_fp[0],
483                       qcoeff, dqcoeff, pd->dequant[0], eob);
484       break;
485     default:
486       assert(tx_size == TX_4X4);
487       x->fwd_txfm4x4(src_diff, coeff, diff_stride);
488       vpx_quantize_dc(coeff, 16, x->skip_block, p->round, p->quant_fp[0],
489                       qcoeff, dqcoeff, pd->dequant[0], eob);
490       break;
491   }
492 }
493 
vp9_xform_quant(MACROBLOCK * x,int plane,int block,int row,int col,BLOCK_SIZE plane_bsize,TX_SIZE tx_size)494 void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col,
495                      BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
496   MACROBLOCKD *const xd = &x->e_mbd;
497   const struct macroblock_plane *const p = &x->plane[plane];
498   const struct macroblockd_plane *const pd = &xd->plane[plane];
499   const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
500   tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
501   tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
502   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
503   uint16_t *const eob = &p->eobs[block];
504   const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
505   const int16_t *src_diff;
506   src_diff = &p->src_diff[4 * (row * diff_stride + col)];
507   // skip block condition should be handled before this is called.
508   assert(!x->skip_block);
509 
510 #if CONFIG_VP9_HIGHBITDEPTH
511   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
512     switch (tx_size) {
513       case TX_32X32:
514         highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
515         vpx_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
516                                     p->round, p->quant, p->quant_shift, qcoeff,
517                                     dqcoeff, pd->dequant, eob, scan_order->scan,
518                                     scan_order->iscan);
519         break;
520       case TX_16X16:
521         vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
522         vpx_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
523                               p->quant, p->quant_shift, qcoeff, dqcoeff,
524                               pd->dequant, eob, scan_order->scan,
525                               scan_order->iscan);
526         break;
527       case TX_8X8:
528         vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
529         vpx_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
530                               p->quant, p->quant_shift, qcoeff, dqcoeff,
531                               pd->dequant, eob, scan_order->scan,
532                               scan_order->iscan);
533         break;
534       default:
535         assert(tx_size == TX_4X4);
536         x->fwd_txfm4x4(src_diff, coeff, diff_stride);
537         vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
538                               p->quant, p->quant_shift, qcoeff, dqcoeff,
539                               pd->dequant, eob, scan_order->scan,
540                               scan_order->iscan);
541         break;
542     }
543     return;
544   }
545 #endif  // CONFIG_VP9_HIGHBITDEPTH
546 
547   switch (tx_size) {
548     case TX_32X32:
549       fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
550       vpx_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
551                            p->quant, p->quant_shift, qcoeff, dqcoeff,
552                            pd->dequant, eob, scan_order->scan,
553                            scan_order->iscan);
554       break;
555     case TX_16X16:
556       vpx_fdct16x16(src_diff, coeff, diff_stride);
557       vpx_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant,
558                      p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
559                      scan_order->scan, scan_order->iscan);
560       break;
561     case TX_8X8:
562       vpx_fdct8x8(src_diff, coeff, diff_stride);
563       vpx_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
564                      p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
565                      scan_order->scan, scan_order->iscan);
566       break;
567     default:
568       assert(tx_size == TX_4X4);
569       x->fwd_txfm4x4(src_diff, coeff, diff_stride);
570       vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
571                      p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
572                      scan_order->scan, scan_order->iscan);
573       break;
574   }
575 }
576 
encode_block(int plane,int block,int row,int col,BLOCK_SIZE plane_bsize,TX_SIZE tx_size,void * arg)577 static void encode_block(int plane, int block, int row, int col,
578                          BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
579   struct encode_b_args *const args = arg;
580   MACROBLOCK *const x = args->x;
581   MACROBLOCKD *const xd = &x->e_mbd;
582   struct macroblock_plane *const p = &x->plane[plane];
583   struct macroblockd_plane *const pd = &xd->plane[plane];
584   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
585   uint8_t *dst;
586   ENTROPY_CONTEXT *a, *l;
587   dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
588   a = &args->ta[col];
589   l = &args->tl[row];
590 
591   // TODO(jingning): per transformed block zero forcing only enabled for
592   // luma component. will integrate chroma components as well.
593   if (x->zcoeff_blk[tx_size][block] && plane == 0) {
594     p->eobs[block] = 0;
595     *a = *l = 0;
596     return;
597   }
598 
599   if (!x->skip_recode) {
600     if (x->quant_fp) {
601       // Encoding process for rtc mode
602       if (x->skip_txfm[0] == SKIP_TXFM_AC_DC && plane == 0) {
603         // skip forward transform
604         p->eobs[block] = 0;
605         *a = *l = 0;
606         return;
607       } else {
608         vp9_xform_quant_fp(x, plane, block, row, col, plane_bsize, tx_size);
609       }
610     } else {
611       if (max_txsize_lookup[plane_bsize] == tx_size) {
612         int txfm_blk_index = (plane << 2) + (block >> (tx_size << 1));
613         if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_NONE) {
614           // full forward transform and quantization
615           vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
616         } else if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_AC_ONLY) {
617           // fast path forward transform and quantization
618           vp9_xform_quant_dc(x, plane, block, row, col, plane_bsize, tx_size);
619         } else {
620           // skip forward transform
621           p->eobs[block] = 0;
622           *a = *l = 0;
623           return;
624         }
625       } else {
626         vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
627       }
628     }
629   }
630 
631   if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
632     const int ctx = combine_entropy_contexts(*a, *l);
633     *a = *l = vp9_optimize_b(x, plane, block, tx_size, ctx) > 0;
634   } else {
635     *a = *l = p->eobs[block] > 0;
636   }
637 
638   if (p->eobs[block]) *(args->skip) = 0;
639 
640   if (x->skip_encode || p->eobs[block] == 0) return;
641 #if CONFIG_VP9_HIGHBITDEPTH
642   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
643     uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst);
644     switch (tx_size) {
645       case TX_32X32:
646         vp9_highbd_idct32x32_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
647                                  xd->bd);
648         break;
649       case TX_16X16:
650         vp9_highbd_idct16x16_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
651                                  xd->bd);
652         break;
653       case TX_8X8:
654         vp9_highbd_idct8x8_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
655                                xd->bd);
656         break;
657       default:
658         assert(tx_size == TX_4X4);
659         // this is like vp9_short_idct4x4 but has a special case around eob<=1
660         // which is significant (not just an optimization) for the lossless
661         // case.
662         x->highbd_inv_txfm_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
663                                xd->bd);
664         break;
665     }
666     return;
667   }
668 #endif  // CONFIG_VP9_HIGHBITDEPTH
669 
670   switch (tx_size) {
671     case TX_32X32:
672       vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
673       break;
674     case TX_16X16:
675       vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
676       break;
677     case TX_8X8:
678       vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
679       break;
680     default:
681       assert(tx_size == TX_4X4);
682       // this is like vp9_short_idct4x4 but has a special case around eob<=1
683       // which is significant (not just an optimization) for the lossless
684       // case.
685       x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
686       break;
687   }
688 }
689 
encode_block_pass1(int plane,int block,int row,int col,BLOCK_SIZE plane_bsize,TX_SIZE tx_size,void * arg)690 static void encode_block_pass1(int plane, int block, int row, int col,
691                                BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
692                                void *arg) {
693   MACROBLOCK *const x = (MACROBLOCK *)arg;
694   MACROBLOCKD *const xd = &x->e_mbd;
695   struct macroblock_plane *const p = &x->plane[plane];
696   struct macroblockd_plane *const pd = &xd->plane[plane];
697   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
698   uint8_t *dst;
699   dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
700 
701   vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
702 
703   if (p->eobs[block] > 0) {
704 #if CONFIG_VP9_HIGHBITDEPTH
705     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
706       x->highbd_inv_txfm_add(dqcoeff, CONVERT_TO_SHORTPTR(dst), pd->dst.stride,
707                              p->eobs[block], xd->bd);
708       return;
709     }
710 #endif  // CONFIG_VP9_HIGHBITDEPTH
711     x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
712   }
713 }
714 
vp9_encode_sby_pass1(MACROBLOCK * x,BLOCK_SIZE bsize)715 void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) {
716   vp9_subtract_plane(x, bsize, 0);
717   vp9_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0,
718                                          encode_block_pass1, x);
719 }
720 
vp9_encode_sb(MACROBLOCK * x,BLOCK_SIZE bsize)721 void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
722   MACROBLOCKD *const xd = &x->e_mbd;
723   struct optimize_ctx ctx;
724   MODE_INFO *mi = xd->mi[0];
725   struct encode_b_args arg = { x, 1, NULL, NULL, &mi->skip };
726   int plane;
727 
728   mi->skip = 1;
729 
730   if (x->skip) return;
731 
732   for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
733     if (!x->skip_recode) vp9_subtract_plane(x, bsize, plane);
734 
735     if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
736       const struct macroblockd_plane *const pd = &xd->plane[plane];
737       const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) : mi->tx_size;
738       vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane],
739                                ctx.tl[plane]);
740       arg.enable_coeff_opt = 1;
741     } else {
742       arg.enable_coeff_opt = 0;
743     }
744     arg.ta = ctx.ta[plane];
745     arg.tl = ctx.tl[plane];
746 
747     vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block,
748                                            &arg);
749   }
750 }
751 
vp9_encode_block_intra(int plane,int block,int row,int col,BLOCK_SIZE plane_bsize,TX_SIZE tx_size,void * arg)752 void vp9_encode_block_intra(int plane, int block, int row, int col,
753                             BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
754                             void *arg) {
755   struct encode_b_args *const args = arg;
756   MACROBLOCK *const x = args->x;
757   MACROBLOCKD *const xd = &x->e_mbd;
758   MODE_INFO *mi = xd->mi[0];
759   struct macroblock_plane *const p = &x->plane[plane];
760   struct macroblockd_plane *const pd = &xd->plane[plane];
761   tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block);
762   tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
763   tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
764   const scan_order *scan_order;
765   TX_TYPE tx_type = DCT_DCT;
766   PREDICTION_MODE mode;
767   const int bwl = b_width_log2_lookup[plane_bsize];
768   const int diff_stride = 4 * (1 << bwl);
769   uint8_t *src, *dst;
770   int16_t *src_diff;
771   uint16_t *eob = &p->eobs[block];
772   const int src_stride = p->src.stride;
773   const int dst_stride = pd->dst.stride;
774   ENTROPY_CONTEXT *a = NULL;
775   ENTROPY_CONTEXT *l = NULL;
776   int entropy_ctx = 0;
777   dst = &pd->dst.buf[4 * (row * dst_stride + col)];
778   src = &p->src.buf[4 * (row * src_stride + col)];
779   src_diff = &p->src_diff[4 * (row * diff_stride + col)];
780   if (args->enable_coeff_opt) {
781     a = &args->ta[col];
782     l = &args->tl[row];
783     entropy_ctx = combine_entropy_contexts(*a, *l);
784   }
785 
786   if (tx_size == TX_4X4) {
787     tx_type = get_tx_type_4x4(get_plane_type(plane), xd, block);
788     scan_order = &vp9_scan_orders[TX_4X4][tx_type];
789     mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mi->uv_mode;
790   } else {
791     mode = plane == 0 ? mi->mode : mi->uv_mode;
792     if (tx_size == TX_32X32) {
793       scan_order = &vp9_default_scan_orders[TX_32X32];
794     } else {
795       tx_type = get_tx_type(get_plane_type(plane), xd);
796       scan_order = &vp9_scan_orders[tx_size][tx_type];
797     }
798   }
799 
800   vp9_predict_intra_block(
801       xd, bwl, tx_size, mode, (x->skip_encode || x->fp_src_pred) ? src : dst,
802       (x->skip_encode || x->fp_src_pred) ? src_stride : dst_stride, dst,
803       dst_stride, col, row, plane);
804 
805   // skip block condition should be handled before this is called.
806   assert(!x->skip_block);
807 
808 #if CONFIG_VP9_HIGHBITDEPTH
809   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
810     uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst);
811     switch (tx_size) {
812       case TX_32X32:
813         if (!x->skip_recode) {
814           vpx_highbd_subtract_block(32, 32, src_diff, diff_stride, src,
815                                     src_stride, dst, dst_stride, xd->bd);
816           highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
817           vpx_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
818                                       p->round, p->quant, p->quant_shift,
819                                       qcoeff, dqcoeff, pd->dequant, eob,
820                                       scan_order->scan, scan_order->iscan);
821         }
822         if (args->enable_coeff_opt && !x->skip_recode) {
823           *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
824         }
825         if (!x->skip_encode && *eob) {
826           vp9_highbd_idct32x32_add(dqcoeff, dst16, dst_stride, *eob, xd->bd);
827         }
828         break;
829       case TX_16X16:
830         if (!x->skip_recode) {
831           vpx_highbd_subtract_block(16, 16, src_diff, diff_stride, src,
832                                     src_stride, dst, dst_stride, xd->bd);
833           if (tx_type == DCT_DCT)
834             vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
835           else
836             vp9_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
837           vpx_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
838                                 p->quant, p->quant_shift, qcoeff, dqcoeff,
839                                 pd->dequant, eob, scan_order->scan,
840                                 scan_order->iscan);
841         }
842         if (args->enable_coeff_opt && !x->skip_recode) {
843           *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
844         }
845         if (!x->skip_encode && *eob) {
846           vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst16, dst_stride, *eob,
847                                   xd->bd);
848         }
849         break;
850       case TX_8X8:
851         if (!x->skip_recode) {
852           vpx_highbd_subtract_block(8, 8, src_diff, diff_stride, src,
853                                     src_stride, dst, dst_stride, xd->bd);
854           if (tx_type == DCT_DCT)
855             vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
856           else
857             vp9_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
858           vpx_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
859                                 p->quant, p->quant_shift, qcoeff, dqcoeff,
860                                 pd->dequant, eob, scan_order->scan,
861                                 scan_order->iscan);
862         }
863         if (args->enable_coeff_opt && !x->skip_recode) {
864           *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
865         }
866         if (!x->skip_encode && *eob) {
867           vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst16, dst_stride, *eob,
868                                 xd->bd);
869         }
870         break;
871       default:
872         assert(tx_size == TX_4X4);
873         if (!x->skip_recode) {
874           vpx_highbd_subtract_block(4, 4, src_diff, diff_stride, src,
875                                     src_stride, dst, dst_stride, xd->bd);
876           if (tx_type != DCT_DCT)
877             vp9_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
878           else
879             x->fwd_txfm4x4(src_diff, coeff, diff_stride);
880           vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
881                                 p->quant, p->quant_shift, qcoeff, dqcoeff,
882                                 pd->dequant, eob, scan_order->scan,
883                                 scan_order->iscan);
884         }
885         if (args->enable_coeff_opt && !x->skip_recode) {
886           *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
887         }
888         if (!x->skip_encode && *eob) {
889           if (tx_type == DCT_DCT) {
890             // this is like vp9_short_idct4x4 but has a special case around
891             // eob<=1 which is significant (not just an optimization) for the
892             // lossless case.
893             x->highbd_inv_txfm_add(dqcoeff, dst16, dst_stride, *eob, xd->bd);
894           } else {
895             vp9_highbd_iht4x4_16_add(dqcoeff, dst16, dst_stride, tx_type,
896                                      xd->bd);
897           }
898         }
899         break;
900     }
901     if (*eob) *(args->skip) = 0;
902     return;
903   }
904 #endif  // CONFIG_VP9_HIGHBITDEPTH
905 
906   switch (tx_size) {
907     case TX_32X32:
908       if (!x->skip_recode) {
909         vpx_subtract_block(32, 32, src_diff, diff_stride, src, src_stride, dst,
910                            dst_stride);
911         fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
912         vpx_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
913                              p->quant, p->quant_shift, qcoeff, dqcoeff,
914                              pd->dequant, eob, scan_order->scan,
915                              scan_order->iscan);
916       }
917       if (args->enable_coeff_opt && !x->skip_recode) {
918         *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
919       }
920       if (!x->skip_encode && *eob)
921         vp9_idct32x32_add(dqcoeff, dst, dst_stride, *eob);
922       break;
923     case TX_16X16:
924       if (!x->skip_recode) {
925         vpx_subtract_block(16, 16, src_diff, diff_stride, src, src_stride, dst,
926                            dst_stride);
927         vp9_fht16x16(src_diff, coeff, diff_stride, tx_type);
928         vpx_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant,
929                        p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
930                        scan_order->scan, scan_order->iscan);
931       }
932       if (args->enable_coeff_opt && !x->skip_recode) {
933         *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
934       }
935       if (!x->skip_encode && *eob)
936         vp9_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, *eob);
937       break;
938     case TX_8X8:
939       if (!x->skip_recode) {
940         vpx_subtract_block(8, 8, src_diff, diff_stride, src, src_stride, dst,
941                            dst_stride);
942         vp9_fht8x8(src_diff, coeff, diff_stride, tx_type);
943         vpx_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
944                        p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
945                        scan_order->scan, scan_order->iscan);
946       }
947       if (args->enable_coeff_opt && !x->skip_recode) {
948         *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
949       }
950       if (!x->skip_encode && *eob)
951         vp9_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob);
952       break;
953     default:
954       assert(tx_size == TX_4X4);
955       if (!x->skip_recode) {
956         vpx_subtract_block(4, 4, src_diff, diff_stride, src, src_stride, dst,
957                            dst_stride);
958         if (tx_type != DCT_DCT)
959           vp9_fht4x4(src_diff, coeff, diff_stride, tx_type);
960         else
961           x->fwd_txfm4x4(src_diff, coeff, diff_stride);
962         vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
963                        p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
964                        scan_order->scan, scan_order->iscan);
965       }
966       if (args->enable_coeff_opt && !x->skip_recode) {
967         *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
968       }
969       if (!x->skip_encode && *eob) {
970         if (tx_type == DCT_DCT)
971           // this is like vp9_short_idct4x4 but has a special case around eob<=1
972           // which is significant (not just an optimization) for the lossless
973           // case.
974           x->inv_txfm_add(dqcoeff, dst, dst_stride, *eob);
975         else
976           vp9_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type);
977       }
978       break;
979   }
980   if (*eob) *(args->skip) = 0;
981 }
982 
vp9_encode_intra_block_plane(MACROBLOCK * x,BLOCK_SIZE bsize,int plane,int enable_optimize_b)983 void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane,
984                                   int enable_optimize_b) {
985   const MACROBLOCKD *const xd = &x->e_mbd;
986   struct optimize_ctx ctx;
987   struct encode_b_args arg = { x, enable_optimize_b, ctx.ta[plane],
988                                ctx.tl[plane], &xd->mi[0]->skip };
989 
990   if (enable_optimize_b && x->optimize &&
991       (!x->skip_recode || !x->skip_optimize)) {
992     const struct macroblockd_plane *const pd = &xd->plane[plane];
993     const TX_SIZE tx_size =
994         plane ? get_uv_tx_size(xd->mi[0], pd) : xd->mi[0]->tx_size;
995     vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]);
996   } else {
997     arg.enable_coeff_opt = 0;
998   }
999 
1000   vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
1001                                          vp9_encode_block_intra, &arg);
1002 }
1003