1 /*
2  * Copyright (c) 2017, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include "av1/encoder/encodetxb.h"
13 
14 #include "aom_ports/mem.h"
15 #include "av1/common/blockd.h"
16 #include "av1/common/idct.h"
17 #include "av1/common/pred_common.h"
18 #include "av1/common/scan.h"
19 #include "av1/encoder/bitstream.h"
20 #include "av1/encoder/cost.h"
21 #include "av1/encoder/encodeframe.h"
22 #include "av1/encoder/hash.h"
23 #include "av1/encoder/rdopt.h"
24 #include "av1/encoder/tokenize.h"
25 
26 static int hbt_needs_init = 1;
27 static CRC32C crc_calculator;
28 static const int HBT_EOB = 16;            // also the length in opt_qcoeff
29 static const int HBT_TABLE_SIZE = 65536;  // 16 bit: holds 65536 'arrays'
30 static const int HBT_ARRAY_LENGTH = 256;  // 8 bit: 256 entries
31 // If removed in hbt_create_hashes or increased beyond int8_t, widen deltas type
32 static const int HBT_KICKOUT = 3;
33 
34 typedef struct OptTxbQcoeff {
35   // Use larger type if larger/no kickout value is used in hbt_create_hashes
36   int8_t deltas[16];
37   uint32_t hbt_qc_hash;
38   uint32_t hbt_ctx_hash;
39   int init;
40   int rate_cost;
41 } OptTxbQcoeff;
42 
43 OptTxbQcoeff *hbt_hash_table;
44 
45 typedef struct LevelDownStats {
46   int update;
47   tran_low_t low_qc;
48   tran_low_t low_dqc;
49   int64_t dist0;
50   int rate;
51   int rate_low;
52   int64_t dist;
53   int64_t dist_low;
54   int64_t rd;
55   int64_t rd_low;
56   int64_t nz_rd;
57   int64_t rd_diff;
58   int cost_diff;
59   int64_t dist_diff;
60   int new_eob;
61 } LevelDownStats;
62 
av1_alloc_txb_buf(AV1_COMP * cpi)63 void av1_alloc_txb_buf(AV1_COMP *cpi) {
64   AV1_COMMON *cm = &cpi->common;
65   int size = ((cm->mi_rows >> cm->seq_params.mib_size_log2) + 1) *
66              ((cm->mi_cols >> cm->seq_params.mib_size_log2) + 1);
67 
68   av1_free_txb_buf(cpi);
69   // TODO(jingning): This should be further reduced.
70   CHECK_MEM_ERROR(cm, cpi->coeff_buffer_base,
71                   aom_memalign(32, sizeof(*cpi->coeff_buffer_base) * size));
72 }
73 
av1_free_txb_buf(AV1_COMP * cpi)74 void av1_free_txb_buf(AV1_COMP *cpi) { aom_free(cpi->coeff_buffer_base); }
75 
av1_set_coeff_buffer(const AV1_COMP * const cpi,MACROBLOCK * const x,int mi_row,int mi_col)76 void av1_set_coeff_buffer(const AV1_COMP *const cpi, MACROBLOCK *const x,
77                           int mi_row, int mi_col) {
78   const AV1_COMMON *const cm = &cpi->common;
79   int mib_size_log2 = cm->seq_params.mib_size_log2;
80   int stride = (cm->mi_cols >> mib_size_log2) + 1;
81   int offset = (mi_row >> mib_size_log2) * stride + (mi_col >> mib_size_log2);
82   x->mbmi_ext->cb_coef_buff = &cpi->coeff_buffer_base[offset];
83   x->mbmi_ext->cb_offset = x->cb_offset;
84   assert(x->cb_offset < (1 << num_pels_log2_lookup[cm->seq_params.sb_size]));
85 }
86 
write_golomb(aom_writer * w,int level)87 static void write_golomb(aom_writer *w, int level) {
88   int x = level + 1;
89   int i = x;
90   int length = 0;
91 
92   while (i) {
93     i >>= 1;
94     ++length;
95   }
96   assert(length > 0);
97 
98   for (i = 0; i < length - 1; ++i) aom_write_bit(w, 0);
99 
100   for (i = length - 1; i >= 0; --i) aom_write_bit(w, (x >> i) & 0x01);
101 }
102 
get_lower_coeff(tran_low_t qc)103 static INLINE tran_low_t get_lower_coeff(tran_low_t qc) {
104   if (qc == 0) {
105     return 0;
106   }
107   return qc > 0 ? qc - 1 : qc + 1;
108 }
109 
qcoeff_to_dqcoeff(tran_low_t qc,int coeff_idx,int dqv,int shift,const qm_val_t * iqmatrix)110 static INLINE tran_low_t qcoeff_to_dqcoeff(tran_low_t qc, int coeff_idx,
111                                            int dqv, int shift,
112                                            const qm_val_t *iqmatrix) {
113   int sign = qc < 0 ? -1 : 1;
114   if (iqmatrix != NULL)
115     dqv =
116         ((iqmatrix[coeff_idx] * dqv) + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
117   return sign * ((abs(qc) * dqv) >> shift);
118 }
119 
get_coeff_dist(tran_low_t tcoeff,tran_low_t dqcoeff,int shift)120 static INLINE int64_t get_coeff_dist(tran_low_t tcoeff, tran_low_t dqcoeff,
121                                      int shift) {
122   const int64_t diff = (tcoeff - dqcoeff) * (1 << shift);
123   const int64_t error = diff * diff;
124   return error;
125 }
126 
127 static const int8_t eob_to_pos_small[33] = {
128   0, 1, 2,                                        // 0-2
129   3, 3,                                           // 3-4
130   4, 4, 4, 4,                                     // 5-8
131   5, 5, 5, 5, 5, 5, 5, 5,                         // 9-16
132   6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6  // 17-32
133 };
134 
135 static const int8_t eob_to_pos_large[17] = {
136   6,                               // place holder
137   7,                               // 33-64
138   8,  8,                           // 65-128
139   9,  9,  9,  9,                   // 129-256
140   10, 10, 10, 10, 10, 10, 10, 10,  // 257-512
141   11                               // 513-
142 };
143 
get_eob_pos_token(const int eob,int * const extra)144 static INLINE int get_eob_pos_token(const int eob, int *const extra) {
145   int t;
146 
147   if (eob < 33) {
148     t = eob_to_pos_small[eob];
149   } else {
150     const int e = AOMMIN((eob - 1) >> 5, 16);
151     t = eob_to_pos_large[e];
152   }
153 
154   *extra = eob - k_eob_group_start[t];
155 
156   return t;
157 }
158 
159 #if CONFIG_ENTROPY_STATS
av1_update_eob_context(int cdf_idx,int eob,TX_SIZE tx_size,TX_CLASS tx_class,PLANE_TYPE plane,FRAME_CONTEXT * ec_ctx,FRAME_COUNTS * counts,uint8_t allow_update_cdf)160 void av1_update_eob_context(int cdf_idx, int eob, TX_SIZE tx_size,
161                             TX_CLASS tx_class, PLANE_TYPE plane,
162                             FRAME_CONTEXT *ec_ctx, FRAME_COUNTS *counts,
163                             uint8_t allow_update_cdf) {
164 #else
165 void av1_update_eob_context(int eob, TX_SIZE tx_size, TX_CLASS tx_class,
166                             PLANE_TYPE plane, FRAME_CONTEXT *ec_ctx,
167                             uint8_t allow_update_cdf) {
168 #endif
169   int eob_extra;
170   const int eob_pt = get_eob_pos_token(eob, &eob_extra);
171   TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
172 
173   const int eob_multi_size = txsize_log2_minus4[tx_size];
174   const int eob_multi_ctx = (tx_class == TX_CLASS_2D) ? 0 : 1;
175 
176   switch (eob_multi_size) {
177     case 0:
178 #if CONFIG_ENTROPY_STATS
179       ++counts->eob_multi16[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
180 #endif
181       if (allow_update_cdf)
182         update_cdf(ec_ctx->eob_flag_cdf16[plane][eob_multi_ctx], eob_pt - 1, 5);
183       break;
184     case 1:
185 #if CONFIG_ENTROPY_STATS
186       ++counts->eob_multi32[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
187 #endif
188       if (allow_update_cdf)
189         update_cdf(ec_ctx->eob_flag_cdf32[plane][eob_multi_ctx], eob_pt - 1, 6);
190       break;
191     case 2:
192 #if CONFIG_ENTROPY_STATS
193       ++counts->eob_multi64[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
194 #endif
195       if (allow_update_cdf)
196         update_cdf(ec_ctx->eob_flag_cdf64[plane][eob_multi_ctx], eob_pt - 1, 7);
197       break;
198     case 3:
199 #if CONFIG_ENTROPY_STATS
200       ++counts->eob_multi128[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
201 #endif
202       if (allow_update_cdf) {
203         update_cdf(ec_ctx->eob_flag_cdf128[plane][eob_multi_ctx], eob_pt - 1,
204                    8);
205       }
206       break;
207     case 4:
208 #if CONFIG_ENTROPY_STATS
209       ++counts->eob_multi256[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
210 #endif
211       if (allow_update_cdf) {
212         update_cdf(ec_ctx->eob_flag_cdf256[plane][eob_multi_ctx], eob_pt - 1,
213                    9);
214       }
215       break;
216     case 5:
217 #if CONFIG_ENTROPY_STATS
218       ++counts->eob_multi512[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
219 #endif
220       if (allow_update_cdf) {
221         update_cdf(ec_ctx->eob_flag_cdf512[plane][eob_multi_ctx], eob_pt - 1,
222                    10);
223       }
224       break;
225     case 6:
226     default:
227 #if CONFIG_ENTROPY_STATS
228       ++counts->eob_multi1024[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
229 #endif
230       if (allow_update_cdf) {
231         update_cdf(ec_ctx->eob_flag_cdf1024[plane][eob_multi_ctx], eob_pt - 1,
232                    11);
233       }
234       break;
235   }
236 
237   if (k_eob_offset_bits[eob_pt] > 0) {
238     int eob_ctx = eob_pt - 3;
239     int eob_shift = k_eob_offset_bits[eob_pt] - 1;
240     int bit = (eob_extra & (1 << eob_shift)) ? 1 : 0;
241 #if CONFIG_ENTROPY_STATS
242     counts->eob_extra[cdf_idx][txs_ctx][plane][eob_pt][bit]++;
243 #endif  // CONFIG_ENTROPY_STATS
244     if (allow_update_cdf)
245       update_cdf(ec_ctx->eob_extra_cdf[txs_ctx][plane][eob_ctx], bit, 2);
246   }
247 }
248 
249 static int get_eob_cost(int eob, const LV_MAP_EOB_COST *txb_eob_costs,
250                         const LV_MAP_COEFF_COST *txb_costs, TX_CLASS tx_class) {
251   int eob_extra;
252   const int eob_pt = get_eob_pos_token(eob, &eob_extra);
253   int eob_cost = 0;
254   const int eob_multi_ctx = (tx_class == TX_CLASS_2D) ? 0 : 1;
255   eob_cost = txb_eob_costs->eob_cost[eob_multi_ctx][eob_pt - 1];
256 
257   if (k_eob_offset_bits[eob_pt] > 0) {
258     const int eob_ctx = eob_pt - 3;
259     const int eob_shift = k_eob_offset_bits[eob_pt] - 1;
260     const int bit = (eob_extra & (1 << eob_shift)) ? 1 : 0;
261     eob_cost += txb_costs->eob_extra_cost[eob_ctx][bit];
262     const int offset_bits = k_eob_offset_bits[eob_pt];
263     if (offset_bits > 1) eob_cost += av1_cost_literal(offset_bits - 1);
264   }
265   return eob_cost;
266 }
267 
268 static INLINE int get_sign_bit_cost(tran_low_t qc, int coeff_idx,
269                                     const int (*dc_sign_cost)[2],
270                                     int dc_sign_ctx) {
271   if (coeff_idx == 0) {
272     const int sign = (qc < 0) ? 1 : 0;
273     return dc_sign_cost[dc_sign_ctx][sign];
274   }
275   return av1_cost_literal(1);
276 }
277 
278 static const int golomb_bits_cost[32] = {
279   0,       512,     512 * 3, 512 * 3, 512 * 5, 512 * 5, 512 * 5, 512 * 5,
280   512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7,
281   512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9,
282   512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9
283 };
284 static const int golomb_cost_diff[32] = {
285   0,       512, 512 * 2, 0, 512 * 2, 0, 0, 0, 512 * 2, 0, 0, 0, 0, 0, 0, 0,
286   512 * 2, 0,   0,       0, 0,       0, 0, 0, 0,       0, 0, 0, 0, 0, 0, 0
287 };
288 
289 static INLINE int get_golomb_cost(int abs_qc) {
290   if (abs_qc >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
291     const int r = abs_qc - COEFF_BASE_RANGE - NUM_BASE_LEVELS;
292     const int length = get_msb(r) + 1;
293     return av1_cost_literal(2 * length - 1);
294   }
295   return 0;
296 }
297 
298 static INLINE int get_br_cost_with_diff(tran_low_t level, const int *coeff_lps,
299                                         int *diff) {
300   const int base_range = AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE);
301   int golomb_bits = 0;
302   if (level <= COEFF_BASE_RANGE + 1 + NUM_BASE_LEVELS)
303     *diff += coeff_lps[base_range + COEFF_BASE_RANGE + 1];
304 
305   if (level >= COEFF_BASE_RANGE + 1 + NUM_BASE_LEVELS) {
306     int r = level - COEFF_BASE_RANGE - NUM_BASE_LEVELS;
307     if (r < 32) {
308       golomb_bits = golomb_bits_cost[r];
309       *diff += golomb_cost_diff[r];
310     } else {
311       golomb_bits = get_golomb_cost(level);
312       *diff += (r & (r - 1)) == 0 ? 1024 : 0;
313     }
314   }
315 
316   return coeff_lps[base_range] + golomb_bits;
317 }
318 
319 static INLINE int get_br_cost(tran_low_t level, const int *coeff_lps) {
320   const int base_range = AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE);
321   return coeff_lps[base_range] + get_golomb_cost(level);
322 }
323 
324 static int get_coeff_cost(const tran_low_t qc, const int scan_idx,
325                           const int is_eob, const TxbInfo *const txb_info,
326                           const LV_MAP_COEFF_COST *const txb_costs,
327                           const int coeff_ctx, const TX_CLASS tx_class) {
328   const TXB_CTX *const txb_ctx = txb_info->txb_ctx;
329   const int is_nz = (qc != 0);
330   const tran_low_t abs_qc = abs(qc);
331   int cost = 0;
332   const int16_t *const scan = txb_info->scan_order->scan;
333   const int pos = scan[scan_idx];
334 
335   if (is_eob) {
336     cost += txb_costs->base_eob_cost[coeff_ctx][AOMMIN(abs_qc, 3) - 1];
337   } else {
338     cost += txb_costs->base_cost[coeff_ctx][AOMMIN(abs_qc, 3)];
339   }
340   if (is_nz) {
341     cost += get_sign_bit_cost(qc, scan_idx, txb_costs->dc_sign_cost,
342                               txb_ctx->dc_sign_ctx);
343 
344     if (abs_qc > NUM_BASE_LEVELS) {
345       const int ctx =
346           get_br_ctx(txb_info->levels, pos, txb_info->bwl, tx_class);
347       cost += get_br_cost(abs_qc, txb_costs->lps_cost[ctx]);
348     }
349   }
350   return cost;
351 }
352 
353 static INLINE int get_nz_map_ctx(const uint8_t *const levels,
354                                  const int coeff_idx, const int bwl,
355                                  const int height, const int scan_idx,
356                                  const int is_eob, const TX_SIZE tx_size,
357                                  const TX_CLASS tx_class) {
358   if (is_eob) {
359     if (scan_idx == 0) return 0;
360     if (scan_idx <= (height << bwl) / 8) return 1;
361     if (scan_idx <= (height << bwl) / 4) return 2;
362     return 3;
363   }
364   const int stats =
365       get_nz_mag(levels + get_padded_idx(coeff_idx, bwl), bwl, tx_class);
366   return get_nz_map_ctx_from_stats(stats, coeff_idx, bwl, tx_size, tx_class);
367 }
368 
369 static void get_dist_cost_stats(LevelDownStats *const stats, const int scan_idx,
370                                 const int is_eob,
371                                 const LV_MAP_COEFF_COST *const txb_costs,
372                                 const TxbInfo *const txb_info,
373                                 const TX_CLASS tx_class) {
374   const int16_t *const scan = txb_info->scan_order->scan;
375   const int coeff_idx = scan[scan_idx];
376   const tran_low_t qc = txb_info->qcoeff[coeff_idx];
377   const uint8_t *const levels = txb_info->levels;
378   stats->new_eob = -1;
379   stats->update = 0;
380   stats->rd_low = 0;
381   stats->rd = 0;
382   stats->nz_rd = 0;
383   stats->dist_low = 0;
384   stats->rate_low = 0;
385   stats->low_qc = 0;
386 
387   const tran_low_t tqc = txb_info->tcoeff[coeff_idx];
388   const int dqv = txb_info->dequant[coeff_idx != 0];
389   const int coeff_ctx =
390       get_nz_map_ctx(levels, coeff_idx, txb_info->bwl, txb_info->height,
391                      scan_idx, is_eob, txb_info->tx_size, tx_class);
392   const int qc_cost = get_coeff_cost(qc, scan_idx, is_eob, txb_info, txb_costs,
393                                      coeff_ctx, tx_class);
394   assert(qc != 0);
395   const tran_low_t dqc = qcoeff_to_dqcoeff(qc, coeff_idx, dqv, txb_info->shift,
396                                            txb_info->iqmatrix);
397   const int64_t dqc_dist = get_coeff_dist(tqc, dqc, txb_info->shift);
398 
399   // distortion difference when coefficient is quantized to 0
400   const tran_low_t dqc0 =
401       qcoeff_to_dqcoeff(0, coeff_idx, dqv, txb_info->shift, txb_info->iqmatrix);
402 
403   stats->dist0 = get_coeff_dist(tqc, dqc0, txb_info->shift);
404   stats->dist = dqc_dist - stats->dist0;
405   stats->rate = qc_cost;
406 
407   stats->rd = RDCOST(txb_info->rdmult, stats->rate, stats->dist);
408 
409   stats->low_qc = get_lower_coeff(qc);
410 
411   if (is_eob && stats->low_qc == 0) {
412     stats->rd_low = stats->rd;  // disable selection of low_qc in this case.
413   } else {
414     if (stats->low_qc == 0) {
415       stats->dist_low = 0;
416     } else {
417       stats->low_dqc = qcoeff_to_dqcoeff(stats->low_qc, coeff_idx, dqv,
418                                          txb_info->shift, txb_info->iqmatrix);
419       const int64_t low_dqc_dist =
420           get_coeff_dist(tqc, stats->low_dqc, txb_info->shift);
421       stats->dist_low = low_dqc_dist - stats->dist0;
422     }
423     const int low_qc_cost =
424         get_coeff_cost(stats->low_qc, scan_idx, is_eob, txb_info, txb_costs,
425                        coeff_ctx, tx_class);
426     stats->rate_low = low_qc_cost;
427     stats->rd_low = RDCOST(txb_info->rdmult, stats->rate_low, stats->dist_low);
428   }
429 }
430 
431 static void get_dist_cost_stats_with_eob(
432     LevelDownStats *const stats, const int scan_idx,
433     const LV_MAP_COEFF_COST *const txb_costs, const TxbInfo *const txb_info,
434     const TX_CLASS tx_class) {
435   const int is_eob = 0;
436   get_dist_cost_stats(stats, scan_idx, is_eob, txb_costs, txb_info, tx_class);
437 
438   const int16_t *const scan = txb_info->scan_order->scan;
439   const int coeff_idx = scan[scan_idx];
440   const tran_low_t qc = txb_info->qcoeff[coeff_idx];
441   const int coeff_ctx_temp = get_nz_map_ctx(
442       txb_info->levels, coeff_idx, txb_info->bwl, txb_info->height, scan_idx, 1,
443       txb_info->tx_size, tx_class);
444   const int qc_eob_cost = get_coeff_cost(qc, scan_idx, 1, txb_info, txb_costs,
445                                          coeff_ctx_temp, tx_class);
446   int64_t rd_eob = RDCOST(txb_info->rdmult, qc_eob_cost, stats->dist);
447   if (stats->low_qc != 0) {
448     const int low_qc_eob_cost =
449         get_coeff_cost(stats->low_qc, scan_idx, 1, txb_info, txb_costs,
450                        coeff_ctx_temp, tx_class);
451     int64_t rd_eob_low =
452         RDCOST(txb_info->rdmult, low_qc_eob_cost, stats->dist_low);
453     rd_eob = (rd_eob > rd_eob_low) ? rd_eob_low : rd_eob;
454   }
455 
456   stats->nz_rd = AOMMIN(stats->rd_low, stats->rd) - rd_eob;
457 }
458 
459 static INLINE void update_qcoeff(const int coeff_idx, const tran_low_t qc,
460                                  const TxbInfo *const txb_info) {
461   txb_info->qcoeff[coeff_idx] = qc;
462   txb_info->levels[get_padded_idx(coeff_idx, txb_info->bwl)] =
463       (uint8_t)clamp(abs(qc), 0, INT8_MAX);
464 }
465 
466 static INLINE void update_coeff(const int coeff_idx, const tran_low_t qc,
467                                 const TxbInfo *const txb_info) {
468   update_qcoeff(coeff_idx, qc, txb_info);
469   const int dqv = txb_info->dequant[coeff_idx != 0];
470   txb_info->dqcoeff[coeff_idx] = qcoeff_to_dqcoeff(
471       qc, coeff_idx, dqv, txb_info->shift, txb_info->iqmatrix);
472 }
473 
474 void av1_txb_init_levels_c(const tran_low_t *const coeff, const int width,
475                            const int height, uint8_t *const levels) {
476   const int stride = width + TX_PAD_HOR;
477   uint8_t *ls = levels;
478 
479   memset(levels + stride * height, 0,
480          sizeof(*levels) * (TX_PAD_BOTTOM * stride + TX_PAD_END));
481 
482   for (int i = 0; i < height; i++) {
483     for (int j = 0; j < width; j++) {
484       *ls++ = (uint8_t)clamp(abs(coeff[i * width + j]), 0, INT8_MAX);
485     }
486     for (int j = 0; j < TX_PAD_HOR; j++) {
487       *ls++ = 0;
488     }
489   }
490 }
491 
492 void av1_get_nz_map_contexts_c(const uint8_t *const levels,
493                                const int16_t *const scan, const uint16_t eob,
494                                const TX_SIZE tx_size, const TX_CLASS tx_class,
495                                int8_t *const coeff_contexts) {
496   const int bwl = get_txb_bwl(tx_size);
497   const int height = get_txb_high(tx_size);
498   for (int i = 0; i < eob; ++i) {
499     const int pos = scan[i];
500     coeff_contexts[pos] = get_nz_map_ctx(levels, pos, bwl, height, i,
501                                          i == eob - 1, tx_size, tx_class);
502   }
503 }
504 
505 void av1_write_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd,
506                           aom_writer *w, int blk_row, int blk_col, int plane,
507                           TX_SIZE tx_size, const tran_low_t *tcoeff,
508                           uint16_t eob, TXB_CTX *txb_ctx) {
509   const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
510   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
511   aom_write_symbol(w, eob == 0,
512                    ec_ctx->txb_skip_cdf[txs_ctx][txb_ctx->txb_skip_ctx], 2);
513   if (eob == 0) return;
514   const PLANE_TYPE plane_type = get_plane_type(plane);
515   const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col,
516                                           tx_size, cm->reduced_tx_set_used);
517   const TX_CLASS tx_class = tx_type_to_class[tx_type];
518   const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type);
519   const int16_t *const scan = scan_order->scan;
520   int c;
521   const int bwl = get_txb_bwl(tx_size);
522   const int width = get_txb_wide(tx_size);
523   const int height = get_txb_high(tx_size);
524 
525   uint8_t levels_buf[TX_PAD_2D];
526   uint8_t *const levels = set_levels(levels_buf, width);
527   DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]);
528   av1_txb_init_levels(tcoeff, width, height, levels);
529 
530   av1_write_tx_type(cm, xd, blk_row, blk_col, plane, tx_size, w);
531 
532   int eob_extra;
533   const int eob_pt = get_eob_pos_token(eob, &eob_extra);
534   const int eob_multi_size = txsize_log2_minus4[tx_size];
535   const int eob_multi_ctx = (tx_class == TX_CLASS_2D) ? 0 : 1;
536   switch (eob_multi_size) {
537     case 0:
538       aom_write_symbol(w, eob_pt - 1,
539                        ec_ctx->eob_flag_cdf16[plane_type][eob_multi_ctx], 5);
540       break;
541     case 1:
542       aom_write_symbol(w, eob_pt - 1,
543                        ec_ctx->eob_flag_cdf32[plane_type][eob_multi_ctx], 6);
544       break;
545     case 2:
546       aom_write_symbol(w, eob_pt - 1,
547                        ec_ctx->eob_flag_cdf64[plane_type][eob_multi_ctx], 7);
548       break;
549     case 3:
550       aom_write_symbol(w, eob_pt - 1,
551                        ec_ctx->eob_flag_cdf128[plane_type][eob_multi_ctx], 8);
552       break;
553     case 4:
554       aom_write_symbol(w, eob_pt - 1,
555                        ec_ctx->eob_flag_cdf256[plane_type][eob_multi_ctx], 9);
556       break;
557     case 5:
558       aom_write_symbol(w, eob_pt - 1,
559                        ec_ctx->eob_flag_cdf512[plane_type][eob_multi_ctx], 10);
560       break;
561     default:
562       aom_write_symbol(w, eob_pt - 1,
563                        ec_ctx->eob_flag_cdf1024[plane_type][eob_multi_ctx], 11);
564       break;
565   }
566 
567   const int eob_offset_bits = k_eob_offset_bits[eob_pt];
568   if (eob_offset_bits > 0) {
569     const int eob_ctx = eob_pt - 3;
570     int eob_shift = eob_offset_bits - 1;
571     int bit = (eob_extra & (1 << eob_shift)) ? 1 : 0;
572     aom_write_symbol(w, bit,
573                      ec_ctx->eob_extra_cdf[txs_ctx][plane_type][eob_ctx], 2);
574     for (int i = 1; i < eob_offset_bits; i++) {
575       eob_shift = eob_offset_bits - 1 - i;
576       bit = (eob_extra & (1 << eob_shift)) ? 1 : 0;
577       aom_write_bit(w, bit);
578     }
579   }
580 
581   av1_get_nz_map_contexts(levels, scan, eob, tx_size, tx_class, coeff_contexts);
582 
583   for (c = eob - 1; c >= 0; --c) {
584     const int pos = scan[c];
585     const int coeff_ctx = coeff_contexts[pos];
586     const tran_low_t v = tcoeff[pos];
587     const tran_low_t level = abs(v);
588 
589     if (c == eob - 1) {
590       aom_write_symbol(
591           w, AOMMIN(level, 3) - 1,
592           ec_ctx->coeff_base_eob_cdf[txs_ctx][plane_type][coeff_ctx], 3);
593     } else {
594       aom_write_symbol(w, AOMMIN(level, 3),
595                        ec_ctx->coeff_base_cdf[txs_ctx][plane_type][coeff_ctx],
596                        4);
597     }
598     if (level > NUM_BASE_LEVELS) {
599       // level is above 1.
600       const int base_range = level - 1 - NUM_BASE_LEVELS;
601       const int br_ctx = get_br_ctx(levels, pos, bwl, tx_class);
602       aom_cdf_prob *cdf =
603           ec_ctx->coeff_br_cdf[AOMMIN(txs_ctx, TX_32X32)][plane_type][br_ctx];
604       for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) {
605         const int k = AOMMIN(base_range - idx, BR_CDF_SIZE - 1);
606         aom_write_symbol(w, k, cdf, BR_CDF_SIZE);
607         if (k < BR_CDF_SIZE - 1) break;
608       }
609     }
610   }
611 
612   // Loop to code all signs in the transform block,
613   // starting with the sign of DC (if applicable)
614   for (c = 0; c < eob; ++c) {
615     const tran_low_t v = tcoeff[scan[c]];
616     const tran_low_t level = abs(v);
617     const int sign = (v < 0) ? 1 : 0;
618     if (level) {
619       if (c == 0) {
620         aom_write_symbol(
621             w, sign, ec_ctx->dc_sign_cdf[plane_type][txb_ctx->dc_sign_ctx], 2);
622       } else {
623         aom_write_bit(w, sign);
624       }
625       if (level > COEFF_BASE_RANGE + NUM_BASE_LEVELS)
626         write_golomb(w, level - COEFF_BASE_RANGE - 1 - NUM_BASE_LEVELS);
627     }
628   }
629 }
630 
631 typedef struct encode_txb_args {
632   const AV1_COMMON *cm;
633   MACROBLOCK *x;
634   aom_writer *w;
635 } ENCODE_TXB_ARGS;
636 
637 static void write_coeffs_txb_wrap(const AV1_COMMON *cm, MACROBLOCK *x,
638                                   aom_writer *w, int plane, int block,
639                                   int blk_row, int blk_col, TX_SIZE tx_size) {
640   MACROBLOCKD *xd = &x->e_mbd;
641   const int txb_offset =
642       x->mbmi_ext->cb_offset / (TX_SIZE_W_MIN * TX_SIZE_H_MIN);
643   tran_low_t *tcoeff_txb =
644       x->mbmi_ext->cb_coef_buff->tcoeff[plane] + x->mbmi_ext->cb_offset;
645   uint16_t *eob_txb = x->mbmi_ext->cb_coef_buff->eobs[plane] + txb_offset;
646   uint8_t *txb_skip_ctx_txb =
647       x->mbmi_ext->cb_coef_buff->txb_skip_ctx[plane] + txb_offset;
648   int *dc_sign_ctx_txb =
649       x->mbmi_ext->cb_coef_buff->dc_sign_ctx[plane] + txb_offset;
650   tran_low_t *tcoeff = BLOCK_OFFSET(tcoeff_txb, block);
651   uint16_t eob = eob_txb[block];
652   TXB_CTX txb_ctx = { txb_skip_ctx_txb[block], dc_sign_ctx_txb[block] };
653   av1_write_coeffs_txb(cm, xd, w, blk_row, blk_col, plane, tx_size, tcoeff, eob,
654                        &txb_ctx);
655 }
656 
657 void av1_write_coeffs_mb(const AV1_COMMON *const cm, MACROBLOCK *x, int mi_row,
658                          int mi_col, aom_writer *w, BLOCK_SIZE bsize) {
659   MACROBLOCKD *xd = &x->e_mbd;
660   const int num_planes = av1_num_planes(cm);
661   int block[MAX_MB_PLANE] = { 0 };
662   int row, col;
663   assert(bsize == get_plane_block_size(bsize, xd->plane[0].subsampling_x,
664                                        xd->plane[0].subsampling_y));
665   const int max_blocks_wide = max_block_wide(xd, bsize, 0);
666   const int max_blocks_high = max_block_high(xd, bsize, 0);
667   const BLOCK_SIZE max_unit_bsize = BLOCK_64X64;
668   int mu_blocks_wide = block_size_wide[max_unit_bsize] >> tx_size_wide_log2[0];
669   int mu_blocks_high = block_size_high[max_unit_bsize] >> tx_size_high_log2[0];
670   mu_blocks_wide = AOMMIN(max_blocks_wide, mu_blocks_wide);
671   mu_blocks_high = AOMMIN(max_blocks_high, mu_blocks_high);
672 
673   for (row = 0; row < max_blocks_high; row += mu_blocks_high) {
674     for (col = 0; col < max_blocks_wide; col += mu_blocks_wide) {
675       for (int plane = 0; plane < num_planes; ++plane) {
676         const struct macroblockd_plane *const pd = &xd->plane[plane];
677         if (!is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x,
678                                  pd->subsampling_y))
679           continue;
680         const TX_SIZE tx_size = av1_get_tx_size(plane, xd);
681         const int stepr = tx_size_high_unit[tx_size];
682         const int stepc = tx_size_wide_unit[tx_size];
683         const int step = stepr * stepc;
684 
685         const int unit_height = ROUND_POWER_OF_TWO(
686             AOMMIN(mu_blocks_high + row, max_blocks_high), pd->subsampling_y);
687         const int unit_width = ROUND_POWER_OF_TWO(
688             AOMMIN(mu_blocks_wide + col, max_blocks_wide), pd->subsampling_x);
689         for (int blk_row = row >> pd->subsampling_y; blk_row < unit_height;
690              blk_row += stepr) {
691           for (int blk_col = col >> pd->subsampling_x; blk_col < unit_width;
692                blk_col += stepc) {
693             write_coeffs_txb_wrap(cm, x, w, plane, block[plane], blk_row,
694                                   blk_col, tx_size);
695             block[plane] += step;
696           }
697         }
698       }
699     }
700   }
701 }
702 
703 // TODO(angiebird): use this function whenever it's possible
704 static int get_tx_type_cost(const AV1_COMMON *cm, const MACROBLOCK *x,
705                             const MACROBLOCKD *xd, int plane, TX_SIZE tx_size,
706                             TX_TYPE tx_type) {
707   if (plane > 0) return 0;
708 
709   const TX_SIZE square_tx_size = txsize_sqr_map[tx_size];
710 
711   const MB_MODE_INFO *mbmi = xd->mi[0];
712   const int is_inter = is_inter_block(mbmi);
713   if (get_ext_tx_types(tx_size, is_inter, cm->reduced_tx_set_used) > 1 &&
714       !xd->lossless[xd->mi[0]->segment_id]) {
715     const int ext_tx_set =
716         get_ext_tx_set(tx_size, is_inter, cm->reduced_tx_set_used);
717     if (is_inter) {
718       if (ext_tx_set > 0)
719         return x->inter_tx_type_costs[ext_tx_set][square_tx_size][tx_type];
720     } else {
721       if (ext_tx_set > 0) {
722         PREDICTION_MODE intra_dir;
723         if (mbmi->filter_intra_mode_info.use_filter_intra)
724           intra_dir = fimode_to_intradir[mbmi->filter_intra_mode_info
725                                              .filter_intra_mode];
726         else
727           intra_dir = mbmi->mode;
728         return x->intra_tx_type_costs[ext_tx_set][square_tx_size][intra_dir]
729                                      [tx_type];
730       }
731     }
732   }
733   return 0;
734 }
735 
736 static AOM_FORCE_INLINE int warehouse_efficients_txb(
737     const AV1_COMMON *const cm, const MACROBLOCK *x, const int plane,
738     const int block, const TX_SIZE tx_size, const TXB_CTX *const txb_ctx,
739     const struct macroblock_plane *p, const int eob,
740     const PLANE_TYPE plane_type, const LV_MAP_COEFF_COST *const coeff_costs,
741     const MACROBLOCKD *const xd, const TX_TYPE tx_type,
742     const TX_CLASS tx_class) {
743   const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
744   const int txb_skip_ctx = txb_ctx->txb_skip_ctx;
745   const int bwl = get_txb_bwl(tx_size);
746   const int width = get_txb_wide(tx_size);
747   const int height = get_txb_high(tx_size);
748   const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type);
749   const int16_t *const scan = scan_order->scan;
750   uint8_t levels_buf[TX_PAD_2D];
751   uint8_t *const levels = set_levels(levels_buf, width);
752   DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]);
753   const int eob_multi_size = txsize_log2_minus4[tx_size];
754   const LV_MAP_EOB_COST *const eob_costs =
755       &x->eob_costs[eob_multi_size][plane_type];
756   int cost = coeff_costs->txb_skip_cost[txb_skip_ctx][0];
757 
758   av1_txb_init_levels(qcoeff, width, height, levels);
759 
760   cost += get_tx_type_cost(cm, x, xd, plane, tx_size, tx_type);
761 
762   cost += get_eob_cost(eob, eob_costs, coeff_costs, tx_class);
763 
764   av1_get_nz_map_contexts(levels, scan, eob, tx_size, tx_class, coeff_contexts);
765 
766   const int(*lps_cost)[COEFF_BASE_RANGE + 1 + COEFF_BASE_RANGE + 1] =
767       coeff_costs->lps_cost;
768   int c = eob - 1;
769   {
770     const int pos = scan[c];
771     const tran_low_t v = qcoeff[pos];
772     const int sign = v >> 31;
773     const int level = (v ^ sign) - sign;
774     const int coeff_ctx = coeff_contexts[pos];
775     cost += coeff_costs->base_eob_cost[coeff_ctx][AOMMIN(level, 3) - 1];
776 
777     if (v) {
778       // sign bit cost
779       if (level > NUM_BASE_LEVELS) {
780         const int ctx = get_br_ctx_eob(pos, bwl, tx_class);
781         cost += get_br_cost(level, lps_cost[ctx]);
782       }
783       if (c) {
784         cost += av1_cost_literal(1);
785       } else {
786         const int sign01 = (sign ^ sign) - sign;
787         const int dc_sign_ctx = txb_ctx->dc_sign_ctx;
788         cost += coeff_costs->dc_sign_cost[dc_sign_ctx][sign01];
789         return cost;
790       }
791     }
792   }
793   const int(*base_cost)[8] = coeff_costs->base_cost;
794   for (c = eob - 2; c >= 1; --c) {
795     const int pos = scan[c];
796     const int coeff_ctx = coeff_contexts[pos];
797     const tran_low_t v = qcoeff[pos];
798     const int level = abs(v);
799     const int cost0 = base_cost[coeff_ctx][AOMMIN(level, 3)];
800     if (v) {
801       // sign bit cost
802       cost += av1_cost_literal(1);
803       if (level > NUM_BASE_LEVELS) {
804         const int ctx = get_br_ctx(levels, pos, bwl, tx_class);
805         cost += get_br_cost(level, lps_cost[ctx]);
806       }
807     }
808     cost += cost0;
809   }
810   if (c == 0) {
811     const int pos = scan[c];
812     const tran_low_t v = qcoeff[pos];
813     const int coeff_ctx = coeff_contexts[pos];
814     const int sign = v >> 31;
815     const int level = (v ^ sign) - sign;
816     cost += base_cost[coeff_ctx][AOMMIN(level, 3)];
817 
818     if (v) {
819       // sign bit cost
820       const int sign01 = (sign ^ sign) - sign;
821       const int dc_sign_ctx = txb_ctx->dc_sign_ctx;
822       cost += coeff_costs->dc_sign_cost[dc_sign_ctx][sign01];
823       if (level > NUM_BASE_LEVELS) {
824         const int ctx = get_br_ctx(levels, pos, bwl, tx_class);
825         cost += get_br_cost(level, lps_cost[ctx]);
826       }
827     }
828   }
829   return cost;
830 }
831 
832 int av1_cost_coeffs_txb(const AV1_COMMON *const cm, const MACROBLOCK *x,
833                         const int plane, const int block, const TX_SIZE tx_size,
834                         const TX_TYPE tx_type, const TXB_CTX *const txb_ctx) {
835   const struct macroblock_plane *p = &x->plane[plane];
836   const int eob = p->eobs[block];
837   const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
838   const PLANE_TYPE plane_type = get_plane_type(plane);
839   const LV_MAP_COEFF_COST *const coeff_costs =
840       &x->coeff_costs[txs_ctx][plane_type];
841   if (eob == 0) {
842     return coeff_costs->txb_skip_cost[txb_ctx->txb_skip_ctx][1];
843   }
844 
845   const MACROBLOCKD *const xd = &x->e_mbd;
846   const TX_CLASS tx_class = tx_type_to_class[tx_type];
847 
848 #define WAREHOUSE_EFFICIENTS_TXB_CASE(tx_class_literal)                        \
849   case tx_class_literal:                                                       \
850     return warehouse_efficients_txb(cm, x, plane, block, tx_size, txb_ctx, p,  \
851                                     eob, plane_type, coeff_costs, xd, tx_type, \
852                                     tx_class_literal);
853   switch (tx_class) {
854     WAREHOUSE_EFFICIENTS_TXB_CASE(TX_CLASS_2D);
855     WAREHOUSE_EFFICIENTS_TXB_CASE(TX_CLASS_HORIZ);
856     WAREHOUSE_EFFICIENTS_TXB_CASE(TX_CLASS_VERT);
857 #undef WAREHOUSE_EFFICIENTS_TXB_CASE
858     default: assert(false); return 0;
859   }
860 }
861 
862 static int optimize_txb(TxbInfo *txb_info, const LV_MAP_COEFF_COST *txb_costs,
863                         const LV_MAP_EOB_COST *txb_eob_costs, int *rate_cost) {
864   int update = 0;
865   if (txb_info->eob == 0) return update;
866   const int16_t *const scan = txb_info->scan_order->scan;
867   // forward optimize the nz_map`
868   const int init_eob = txb_info->eob;
869   const TX_CLASS tx_class = tx_type_to_class[txb_info->tx_type];
870   const int eob_cost =
871       get_eob_cost(init_eob, txb_eob_costs, txb_costs, tx_class);
872 
873   // backward optimize the level-k map
874   int accu_rate = eob_cost;
875   int64_t accu_dist = 0;
876   int64_t prev_eob_rd_cost = INT64_MAX;
877   int64_t cur_eob_rd_cost = 0;
878 
879   {
880     const int si = init_eob - 1;
881     const int coeff_idx = scan[si];
882     LevelDownStats stats;
883     get_dist_cost_stats(&stats, si, si == init_eob - 1, txb_costs, txb_info,
884                         tx_class);
885     if ((stats.rd_low < stats.rd) && (stats.low_qc != 0)) {
886       update = 1;
887       update_coeff(coeff_idx, stats.low_qc, txb_info);
888       accu_rate += stats.rate_low;
889       accu_dist += stats.dist_low;
890     } else {
891       accu_rate += stats.rate;
892       accu_dist += stats.dist;
893     }
894   }
895 
896   int si = init_eob - 2;
897   int8_t has_nz_tail = 0;
898   // eob is not fixed
899   for (; si >= 0 && has_nz_tail < 2; --si) {
900     assert(si != init_eob - 1);
901     const int coeff_idx = scan[si];
902     tran_low_t qc = txb_info->qcoeff[coeff_idx];
903 
904     if (qc == 0) {
905       const int coeff_ctx =
906           get_lower_levels_ctx(txb_info->levels, coeff_idx, txb_info->bwl,
907                                txb_info->tx_size, tx_class);
908       accu_rate += txb_costs->base_cost[coeff_ctx][0];
909     } else {
910       LevelDownStats stats;
911       get_dist_cost_stats_with_eob(&stats, si, txb_costs, txb_info, tx_class);
912       // check if it is better to make this the last significant coefficient
913       int cur_eob_rate =
914           get_eob_cost(si + 1, txb_eob_costs, txb_costs, tx_class);
915       cur_eob_rd_cost = RDCOST(txb_info->rdmult, cur_eob_rate, 0);
916       prev_eob_rd_cost =
917           RDCOST(txb_info->rdmult, accu_rate, accu_dist) + stats.nz_rd;
918       if (cur_eob_rd_cost <= prev_eob_rd_cost) {
919         update = 1;
920         for (int j = si + 1; j < txb_info->eob; j++) {
921           const int coeff_pos_j = scan[j];
922           update_coeff(coeff_pos_j, 0, txb_info);
923         }
924         txb_info->eob = si + 1;
925 
926         // rerun cost calculation due to change of eob
927         accu_rate = cur_eob_rate;
928         accu_dist = 0;
929         get_dist_cost_stats(&stats, si, 1, txb_costs, txb_info, tx_class);
930         if ((stats.rd_low < stats.rd) && (stats.low_qc != 0)) {
931           update = 1;
932           update_coeff(coeff_idx, stats.low_qc, txb_info);
933           accu_rate += stats.rate_low;
934           accu_dist += stats.dist_low;
935         } else {
936           accu_rate += stats.rate;
937           accu_dist += stats.dist;
938         }
939 
940         // reset non zero tail when new eob is found
941         has_nz_tail = 0;
942       } else {
943         int bUpdCoeff = 0;
944         if (stats.rd_low < stats.rd) {
945           if ((si < txb_info->eob - 1)) {
946             bUpdCoeff = 1;
947             update = 1;
948           }
949         } else {
950           ++has_nz_tail;
951         }
952 
953         if (bUpdCoeff) {
954           update_coeff(coeff_idx, stats.low_qc, txb_info);
955           accu_rate += stats.rate_low;
956           accu_dist += stats.dist_low;
957         } else {
958           accu_rate += stats.rate;
959           accu_dist += stats.dist;
960         }
961       }
962     }
963   }  // for (si)
964 
965   // eob is fixed
966   for (; si >= 0; --si) {
967     assert(si != init_eob - 1);
968     const int coeff_idx = scan[si];
969     tran_low_t qc = txb_info->qcoeff[coeff_idx];
970 
971     if (qc == 0) {
972       const int coeff_ctx =
973           get_lower_levels_ctx(txb_info->levels, coeff_idx, txb_info->bwl,
974                                txb_info->tx_size, tx_class);
975       accu_rate += txb_costs->base_cost[coeff_ctx][0];
976     } else {
977       LevelDownStats stats;
978       get_dist_cost_stats(&stats, si, 0, txb_costs, txb_info, tx_class);
979 
980       int bUpdCoeff = 0;
981       if (stats.rd_low < stats.rd) {
982         if ((si < txb_info->eob - 1)) {
983           bUpdCoeff = 1;
984           update = 1;
985         }
986       }
987       if (bUpdCoeff) {
988         update_coeff(coeff_idx, stats.low_qc, txb_info);
989         accu_rate += stats.rate_low;
990         accu_dist += stats.dist_low;
991       } else {
992         accu_rate += stats.rate;
993         accu_dist += stats.dist;
994       }
995     }
996   }  // for (si)
997 
998   int non_zero_blk_rate =
999       txb_costs->txb_skip_cost[txb_info->txb_ctx->txb_skip_ctx][0];
1000   prev_eob_rd_cost =
1001       RDCOST(txb_info->rdmult, accu_rate + non_zero_blk_rate, accu_dist);
1002 
1003   int zero_blk_rate =
1004       txb_costs->txb_skip_cost[txb_info->txb_ctx->txb_skip_ctx][1];
1005   int64_t zero_blk_rd_cost = RDCOST(txb_info->rdmult, zero_blk_rate, 0);
1006   if (zero_blk_rd_cost <= prev_eob_rd_cost) {
1007     update = 1;
1008     for (int j = 0; j < txb_info->eob; j++) {
1009       const int coeff_pos_j = scan[j];
1010       update_coeff(coeff_pos_j, 0, txb_info);
1011     }
1012     txb_info->eob = 0;
1013   }
1014 
1015   // record total rate cost
1016   *rate_cost = zero_blk_rd_cost <= prev_eob_rd_cost
1017                    ? zero_blk_rate
1018                    : accu_rate + non_zero_blk_rate;
1019 
1020   if (txb_info->eob > 0) {
1021     *rate_cost += txb_info->tx_type_cost;
1022   }
1023 
1024   return update;
1025 }
1026 
1027 static void hbt_init() {
1028   hbt_hash_table =
1029       aom_malloc(sizeof(OptTxbQcoeff) * HBT_TABLE_SIZE * HBT_ARRAY_LENGTH);
1030   memset(hbt_hash_table, 0,
1031          sizeof(OptTxbQcoeff) * HBT_TABLE_SIZE * HBT_ARRAY_LENGTH);
1032   av1_crc32c_calculator_init(&crc_calculator);  // 31 bit: qc & ctx
1033 
1034   hbt_needs_init = 0;
1035 }
1036 
1037 void hbt_destroy() { aom_free(hbt_hash_table); }
1038 
1039 static int hbt_hash_miss(uint32_t hbt_ctx_hash, uint32_t hbt_qc_hash,
1040                          TxbInfo *txb_info, const LV_MAP_COEFF_COST *txb_costs,
1041                          const LV_MAP_EOB_COST *txb_eob_costs,
1042                          const struct macroblock_plane *p, int block,
1043                          int fast_mode, int *rate_cost) {
1044   (void)fast_mode;
1045   const int16_t *scan = txb_info->scan_order->scan;
1046   int prev_eob = txb_info->eob;
1047   assert(HBT_EOB <= 16);  // Lengthen array if allowing longer eob.
1048   int32_t prev_coeff[16];
1049   for (int i = 0; i < prev_eob; i++) {
1050     prev_coeff[i] = txb_info->qcoeff[scan[i]];
1051   }
1052   for (int i = prev_eob; i < HBT_EOB; i++) {
1053     prev_coeff[i] = 0;  // For compiler piece of mind.
1054   }
1055 
1056   av1_txb_init_levels(txb_info->qcoeff, txb_info->width, txb_info->height,
1057                       txb_info->levels);
1058 
1059   const int update =
1060       optimize_txb(txb_info, txb_costs, txb_eob_costs, rate_cost);
1061 
1062   // Overwrite old entry
1063   uint16_t hbt_table_index = hbt_ctx_hash % HBT_TABLE_SIZE;
1064   uint16_t hbt_array_index = hbt_qc_hash % HBT_ARRAY_LENGTH;
1065   hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
1066       .rate_cost = *rate_cost;
1067   hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index].init = 1;
1068   hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
1069       .hbt_qc_hash = hbt_qc_hash;
1070   hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
1071       .hbt_ctx_hash = hbt_ctx_hash;
1072   assert(prev_eob >= txb_info->eob);  // eob can't get longer
1073   for (int i = 0; i < txb_info->eob; i++) {
1074     // Record how coeff changed. Convention: towards zero is negative.
1075     if (txb_info->qcoeff[scan[i]] > 0)
1076       hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
1077           .deltas[i] = txb_info->qcoeff[scan[i]] - prev_coeff[i];
1078     else
1079       hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
1080           .deltas[i] = prev_coeff[i] - txb_info->qcoeff[scan[i]];
1081   }
1082   for (int i = txb_info->eob; i < prev_eob; i++) {
1083     // If eob got shorter, record that all after it changed to zero.
1084     if (prev_coeff[i] > 0)
1085       hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
1086           .deltas[i] = -prev_coeff[i];
1087     else
1088       hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
1089           .deltas[i] = prev_coeff[i];
1090   }
1091   for (int i = prev_eob; i < HBT_EOB; i++) {
1092     // Record 'no change' after optimized coefficients run out.
1093     hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
1094         .deltas[i] = 0;
1095   }
1096 
1097   if (update) {
1098     p->eobs[block] = txb_info->eob;
1099     p->txb_entropy_ctx[block] = av1_get_txb_entropy_context(
1100         txb_info->qcoeff, txb_info->scan_order, txb_info->eob);
1101   }
1102   return txb_info->eob;
1103 }
1104 
1105 static int hbt_hash_hit(uint32_t hbt_table_index, int hbt_array_index,
1106                         TxbInfo *txb_info, const struct macroblock_plane *p,
1107                         int block, int *rate_cost) {
1108   const int16_t *scan = txb_info->scan_order->scan;
1109   int new_eob = 0;
1110   int update = 0;
1111 
1112   for (int i = 0; i < txb_info->eob; i++) {
1113     // Delta convention is negatives go towards zero, so only apply those ones.
1114     if (hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
1115             .deltas[i] < 0) {
1116       if (txb_info->qcoeff[scan[i]] > 0)
1117         txb_info->qcoeff[scan[i]] +=
1118             hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
1119                 .deltas[i];
1120       else
1121         txb_info->qcoeff[scan[i]] -=
1122             hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
1123                 .deltas[i];
1124 
1125       update = 1;
1126       update_coeff(scan[i], txb_info->qcoeff[scan[i]], txb_info);
1127     }
1128     if (txb_info->qcoeff[scan[i]]) new_eob = i + 1;
1129   }
1130 
1131   // Rate_cost can be calculated here instead (av1_cost_coeffs_txb), but
1132   // it is expensive and gives little benefit as long as qc_hash is high bit
1133   *rate_cost =
1134       hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
1135           .rate_cost;
1136 
1137   if (update) {
1138     txb_info->eob = new_eob;
1139     p->eobs[block] = txb_info->eob;
1140     p->txb_entropy_ctx[block] = av1_get_txb_entropy_context(
1141         txb_info->qcoeff, txb_info->scan_order, txb_info->eob);
1142   }
1143 
1144   return txb_info->eob;
1145 }
1146 
1147 static int hbt_search_match(uint32_t hbt_ctx_hash, uint32_t hbt_qc_hash,
1148                             TxbInfo *txb_info,
1149                             const LV_MAP_COEFF_COST *txb_costs,
1150                             const LV_MAP_EOB_COST *txb_eob_costs,
1151                             const struct macroblock_plane *p, int block,
1152                             int fast_mode, int *rate_cost) {
1153   // Check for qcoeff match
1154   int hbt_array_index = hbt_qc_hash % HBT_ARRAY_LENGTH;
1155   int hbt_table_index = hbt_ctx_hash % HBT_TABLE_SIZE;
1156 
1157   if (hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
1158               .hbt_qc_hash == hbt_qc_hash &&
1159       hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
1160               .hbt_ctx_hash == hbt_ctx_hash &&
1161       hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
1162           .init) {
1163     return hbt_hash_hit(hbt_table_index, hbt_array_index, txb_info, p, block,
1164                         rate_cost);
1165   } else {
1166     return hbt_hash_miss(hbt_ctx_hash, hbt_qc_hash, txb_info, txb_costs,
1167                          txb_eob_costs, p, block, fast_mode, rate_cost);
1168   }
1169 }
1170 
1171 static int hbt_create_hashes(TxbInfo *txb_info,
1172                              const LV_MAP_COEFF_COST *txb_costs,
1173                              const LV_MAP_EOB_COST *txb_eob_costs,
1174                              const struct macroblock_plane *p, int block,
1175                              int fast_mode, int *rate_cost) {
1176   // Initialize hash table if needed.
1177   if (hbt_needs_init) {
1178     hbt_init();
1179   }
1180 
1181   //// Hash creation
1182   uint8_t txb_hash_data[256];  // Asserts below to ensure enough space.
1183   const int16_t *scan = txb_info->scan_order->scan;
1184   uint8_t chunk = 0;
1185   int hash_data_index = 0;
1186 
1187   // Make qc_hash.
1188   int packing_index = 0;  // needed for packing.
1189   for (int i = 0; i < txb_info->eob; i++) {
1190     tran_low_t prechunk = txb_info->qcoeff[scan[i]];
1191 
1192     // Softening: Improves speed. Aligns with signed deltas.
1193     if (prechunk < 0) prechunk *= -1;
1194 
1195     // Early kick out: Don't apply feature if there are large coeffs:
1196     // If this kickout value is removed or raised beyond int8_t,
1197     // widen deltas type in OptTxbQcoeff struct.
1198     assert((int8_t)HBT_KICKOUT == HBT_KICKOUT);  // If not, widen types.
1199     if (prechunk > HBT_KICKOUT) {
1200       av1_txb_init_levels(txb_info->qcoeff, txb_info->width, txb_info->height,
1201                           txb_info->levels);
1202 
1203       const int update =
1204           optimize_txb(txb_info, txb_costs, txb_eob_costs, rate_cost);
1205 
1206       if (update) {
1207         p->eobs[block] = txb_info->eob;
1208         p->txb_entropy_ctx[block] = av1_get_txb_entropy_context(
1209             txb_info->qcoeff, txb_info->scan_order, txb_info->eob);
1210       }
1211       return txb_info->eob;
1212     }
1213 
1214     // Since coeffs are 0 to 3, only 2 bits are needed: pack into bytes
1215     if (packing_index == 0) txb_hash_data[hash_data_index] = 0;
1216     chunk = prechunk << packing_index;
1217     packing_index += 2;
1218     txb_hash_data[hash_data_index] |= chunk;
1219 
1220     // Full byte:
1221     if (packing_index == 8) {
1222       packing_index = 0;
1223       hash_data_index++;
1224     }
1225   }
1226   // Needed when packing_index != 0, to include final byte.
1227   hash_data_index++;
1228   assert(hash_data_index <= 64);
1229   // 31 bit qc_hash: index to array
1230   uint32_t hbt_qc_hash =
1231       av1_get_crc32c_value(&crc_calculator, txb_hash_data, hash_data_index);
1232 
1233   // Make ctx_hash.
1234   hash_data_index = 0;
1235   tran_low_t prechunk;
1236 
1237   for (int i = 0; i < txb_info->eob; i++) {
1238     // Save as magnitudes towards or away from zero.
1239     if (txb_info->tcoeff[scan[i]] >= 0)
1240       prechunk = txb_info->tcoeff[scan[i]] - txb_info->dqcoeff[scan[i]];
1241     else
1242       prechunk = txb_info->dqcoeff[scan[i]] - txb_info->tcoeff[scan[i]];
1243 
1244     chunk = prechunk & 0xff;
1245     txb_hash_data[hash_data_index++] = chunk;
1246   }
1247 
1248   // Extra ctx data:
1249   // Include dequants.
1250   txb_hash_data[hash_data_index++] = txb_info->dequant[0] & 0xff;
1251   txb_hash_data[hash_data_index++] = txb_info->dequant[1] & 0xff;
1252   chunk = txb_info->txb_ctx->txb_skip_ctx & 0xff;
1253   txb_hash_data[hash_data_index++] = chunk;
1254   chunk = txb_info->txb_ctx->dc_sign_ctx & 0xff;
1255   txb_hash_data[hash_data_index++] = chunk;
1256   // eob
1257   chunk = txb_info->eob & 0xff;
1258   txb_hash_data[hash_data_index++] = chunk;
1259   // rdmult (int64)
1260   chunk = txb_info->rdmult & 0xff;
1261   txb_hash_data[hash_data_index++] = chunk;
1262   // tx_type
1263   chunk = txb_info->tx_type & 0xff;
1264   txb_hash_data[hash_data_index++] = chunk;
1265   // base_eob_cost
1266   for (int i = 1; i < 3; i++) {  // i = 0 are softened away
1267     for (int j = 0; j < SIG_COEF_CONTEXTS_EOB; j++) {
1268       chunk = (txb_costs->base_eob_cost[j][i] & 0xff00) >> 8;
1269       txb_hash_data[hash_data_index++] = chunk;
1270     }
1271   }
1272   // eob_cost
1273   for (int i = 0; i < 11; i++) {
1274     for (int j = 0; j < 2; j++) {
1275       chunk = (txb_eob_costs->eob_cost[j][i] & 0xff00) >> 8;
1276       txb_hash_data[hash_data_index++] = chunk;
1277     }
1278   }
1279   // dc_sign_cost
1280   for (int i = 0; i < 2; i++) {
1281     for (int j = 0; j < DC_SIGN_CONTEXTS; j++) {
1282       chunk = (txb_costs->dc_sign_cost[j][i] & 0xff00) >> 8;
1283       txb_hash_data[hash_data_index++] = chunk;
1284     }
1285   }
1286 
1287   assert(hash_data_index <= 256);
1288   // 31 bit ctx_hash: used to index table
1289   uint32_t hbt_ctx_hash =
1290       av1_get_crc32c_value(&crc_calculator, txb_hash_data, hash_data_index);
1291   //// End hash creation
1292 
1293   return hbt_search_match(hbt_ctx_hash, hbt_qc_hash, txb_info, txb_costs,
1294                           txb_eob_costs, p, block, fast_mode, rate_cost);
1295 }
1296 
1297 static AOM_FORCE_INLINE int get_two_coeff_cost_simple(
1298     int ci, tran_low_t abs_qc, int coeff_ctx,
1299     const LV_MAP_COEFF_COST *txb_costs, int bwl, TX_CLASS tx_class,
1300     const uint8_t *levels, int *cost_low) {
1301   // this simple version assumes the coeff's scan_idx is not DC (scan_idx != 0)
1302   // and not the last (scan_idx != eob - 1)
1303   assert(ci > 0);
1304   int cost = txb_costs->base_cost[coeff_ctx][AOMMIN(abs_qc, 3)];
1305   int diff = 0;
1306   if (abs_qc <= 3) diff = txb_costs->base_cost[coeff_ctx][abs_qc + 4];
1307   if (abs_qc) {
1308     cost += av1_cost_literal(1);
1309     if (abs_qc > NUM_BASE_LEVELS) {
1310       const int br_ctx = get_br_ctx(levels, ci, bwl, tx_class);
1311       int brcost_diff = 0;
1312       cost += get_br_cost_with_diff(abs_qc, txb_costs->lps_cost[br_ctx],
1313                                     &brcost_diff);
1314       diff += brcost_diff;
1315     }
1316   }
1317   *cost_low = cost - diff;
1318 
1319   return cost;
1320 }
1321 
1322 static INLINE int get_coeff_cost_eob(int ci, tran_low_t abs_qc, int sign,
1323                                      int coeff_ctx, int dc_sign_ctx,
1324                                      const LV_MAP_COEFF_COST *txb_costs,
1325                                      int bwl, TX_CLASS tx_class) {
1326   int cost = 0;
1327   cost += txb_costs->base_eob_cost[coeff_ctx][AOMMIN(abs_qc, 3) - 1];
1328   if (abs_qc != 0) {
1329     if (ci == 0) {
1330       cost += txb_costs->dc_sign_cost[dc_sign_ctx][sign];
1331     } else {
1332       cost += av1_cost_literal(1);
1333     }
1334     if (abs_qc > NUM_BASE_LEVELS) {
1335       int br_ctx;
1336       br_ctx = get_br_ctx_eob(ci, bwl, tx_class);
1337       cost += get_br_cost(abs_qc, txb_costs->lps_cost[br_ctx]);
1338     }
1339   }
1340   return cost;
1341 }
1342 
1343 static INLINE int get_coeff_cost_general(int is_last, int ci, tran_low_t abs_qc,
1344                                          int sign, int coeff_ctx,
1345                                          int dc_sign_ctx,
1346                                          const LV_MAP_COEFF_COST *txb_costs,
1347                                          int bwl, TX_CLASS tx_class,
1348                                          const uint8_t *levels) {
1349   int cost = 0;
1350   if (is_last) {
1351     cost += txb_costs->base_eob_cost[coeff_ctx][AOMMIN(abs_qc, 3) - 1];
1352   } else {
1353     cost += txb_costs->base_cost[coeff_ctx][AOMMIN(abs_qc, 3)];
1354   }
1355   if (abs_qc != 0) {
1356     if (ci == 0) {
1357       cost += txb_costs->dc_sign_cost[dc_sign_ctx][sign];
1358     } else {
1359       cost += av1_cost_literal(1);
1360     }
1361     if (abs_qc > NUM_BASE_LEVELS) {
1362       int br_ctx;
1363       if (is_last)
1364         br_ctx = get_br_ctx_eob(ci, bwl, tx_class);
1365       else
1366         br_ctx = get_br_ctx(levels, ci, bwl, tx_class);
1367       cost += get_br_cost(abs_qc, txb_costs->lps_cost[br_ctx]);
1368     }
1369   }
1370   return cost;
1371 }
1372 
1373 static INLINE void get_qc_dqc_low(tran_low_t abs_qc, int sign, int dqv,
1374                                   int shift, tran_low_t *qc_low,
1375                                   tran_low_t *dqc_low) {
1376   tran_low_t abs_qc_low = abs_qc - 1;
1377   *qc_low = (-sign ^ abs_qc_low) + sign;
1378   assert((sign ? -abs_qc_low : abs_qc_low) == *qc_low);
1379   tran_low_t abs_dqc_low = (abs_qc_low * dqv) >> shift;
1380   *dqc_low = (-sign ^ abs_dqc_low) + sign;
1381   assert((sign ? -abs_dqc_low : abs_dqc_low) == *dqc_low);
1382 }
1383 
1384 static INLINE void update_coeff_general(
1385     int *accu_rate, int64_t *accu_dist, int si, int eob, TX_SIZE tx_size,
1386     TX_CLASS tx_class, int bwl, int height, int64_t rdmult, int shift,
1387     int dc_sign_ctx, const int16_t *dequant, const int16_t *scan,
1388     const LV_MAP_COEFF_COST *txb_costs, const tran_low_t *tcoeff,
1389     tran_low_t *qcoeff, tran_low_t *dqcoeff, uint8_t *levels) {
1390   const int dqv = dequant[si != 0];
1391   const int ci = scan[si];
1392   const tran_low_t qc = qcoeff[ci];
1393   const int is_last = si == (eob - 1);
1394   const int coeff_ctx = get_lower_levels_ctx_general(
1395       is_last, si, bwl, height, levels, ci, tx_size, tx_class);
1396   if (qc == 0) {
1397     *accu_rate += txb_costs->base_cost[coeff_ctx][0];
1398   } else {
1399     const int sign = (qc < 0) ? 1 : 0;
1400     const tran_low_t abs_qc = abs(qc);
1401     const tran_low_t tqc = tcoeff[ci];
1402     const tran_low_t dqc = dqcoeff[ci];
1403     const int64_t dist = get_coeff_dist(tqc, dqc, shift);
1404     const int64_t dist0 = get_coeff_dist(tqc, 0, shift);
1405     const int rate =
1406         get_coeff_cost_general(is_last, ci, abs_qc, sign, coeff_ctx,
1407                                dc_sign_ctx, txb_costs, bwl, tx_class, levels);
1408     const int64_t rd = RDCOST(rdmult, rate, dist);
1409 
1410     tran_low_t qc_low, dqc_low;
1411     tran_low_t abs_qc_low;
1412     int64_t dist_low, rd_low;
1413     int rate_low;
1414     if (abs_qc == 1) {
1415       abs_qc_low = qc_low = dqc_low = 0;
1416       dist_low = dist0;
1417       rate_low = txb_costs->base_cost[coeff_ctx][0];
1418     } else {
1419       get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low);
1420       abs_qc_low = abs_qc - 1;
1421       dist_low = get_coeff_dist(tqc, dqc_low, shift);
1422       rate_low =
1423           get_coeff_cost_general(is_last, ci, abs_qc_low, sign, coeff_ctx,
1424                                  dc_sign_ctx, txb_costs, bwl, tx_class, levels);
1425     }
1426 
1427     rd_low = RDCOST(rdmult, rate_low, dist_low);
1428     if (rd_low < rd) {
1429       qcoeff[ci] = qc_low;
1430       dqcoeff[ci] = dqc_low;
1431       levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX);
1432       *accu_rate += rate_low;
1433       *accu_dist += dist_low - dist0;
1434     } else {
1435       *accu_rate += rate;
1436       *accu_dist += dist - dist0;
1437     }
1438   }
1439 }
1440 
1441 static AOM_FORCE_INLINE void update_coeff_simple(
1442     int *accu_rate, int si, int eob, TX_SIZE tx_size, TX_CLASS tx_class,
1443     int bwl, int64_t rdmult, int shift, const int16_t *dequant,
1444     const int16_t *scan, const LV_MAP_COEFF_COST *txb_costs,
1445     const tran_low_t *tcoeff, tran_low_t *qcoeff, tran_low_t *dqcoeff,
1446     uint8_t *levels) {
1447   const int dqv = dequant[1];
1448   (void)eob;
1449   // this simple version assumes the coeff's scan_idx is not DC (scan_idx != 0)
1450   // and not the last (scan_idx != eob - 1)
1451   assert(si != eob - 1);
1452   assert(si > 0);
1453   const int ci = scan[si];
1454   const tran_low_t qc = qcoeff[ci];
1455   const int coeff_ctx =
1456       get_lower_levels_ctx(levels, ci, bwl, tx_size, tx_class);
1457   if (qc == 0) {
1458     *accu_rate += txb_costs->base_cost[coeff_ctx][0];
1459   } else {
1460     const tran_low_t abs_qc = abs(qc);
1461     const tran_low_t abs_tqc = abs(tcoeff[ci]);
1462     const tran_low_t abs_dqc = abs(dqcoeff[ci]);
1463     int rate_low = 0;
1464     const int rate = get_two_coeff_cost_simple(
1465         ci, abs_qc, coeff_ctx, txb_costs, bwl, tx_class, levels, &rate_low);
1466     if (abs_dqc < abs_tqc) {
1467       *accu_rate += rate;
1468       return;
1469     }
1470 
1471     const int64_t dist = get_coeff_dist(abs_tqc, abs_dqc, shift);
1472     const int64_t rd = RDCOST(rdmult, rate, dist);
1473 
1474     const tran_low_t abs_qc_low = abs_qc - 1;
1475     const tran_low_t abs_dqc_low = (abs_qc_low * dqv) >> shift;
1476     const int64_t dist_low = get_coeff_dist(abs_tqc, abs_dqc_low, shift);
1477     const int64_t rd_low = RDCOST(rdmult, rate_low, dist_low);
1478 
1479     if (rd_low < rd) {
1480       const int sign = (qc < 0) ? 1 : 0;
1481       qcoeff[ci] = (-sign ^ abs_qc_low) + sign;
1482       dqcoeff[ci] = (-sign ^ abs_dqc_low) + sign;
1483       levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX);
1484       *accu_rate += rate_low;
1485     } else {
1486       *accu_rate += rate;
1487     }
1488   }
1489 }
1490 
1491 static INLINE void update_coeff_eob_fast(int *eob, int shift,
1492                                          const int16_t *dequant_ptr,
1493                                          const int16_t *scan,
1494                                          const tran_low_t *coeff_ptr,
1495                                          tran_low_t *qcoeff_ptr,
1496                                          tran_low_t *dqcoeff_ptr) {
1497   // TODO(sarahparker) make this work for aomqm
1498   int eob_out = *eob;
1499   int zbin[2] = { dequant_ptr[0] + ROUND_POWER_OF_TWO(dequant_ptr[0] * 70, 7),
1500                   dequant_ptr[1] + ROUND_POWER_OF_TWO(dequant_ptr[1] * 70, 7) };
1501 
1502   for (int i = *eob - 1; i >= 0; i--) {
1503     const int rc = scan[i];
1504     const int qcoeff = qcoeff_ptr[rc];
1505     const int coeff = coeff_ptr[rc];
1506     const int coeff_sign = (coeff >> 31);
1507     int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
1508 
1509     if (((abs_coeff << (1 + shift)) < zbin[rc != 0]) || (qcoeff == 0)) {
1510       eob_out--;
1511       qcoeff_ptr[rc] = 0;
1512       dqcoeff_ptr[rc] = 0;
1513     } else {
1514       break;
1515     }
1516   }
1517 
1518   *eob = eob_out;
1519 }
1520 
1521 static AOM_FORCE_INLINE void update_coeff_eob(
1522     int *accu_rate, int64_t *accu_dist, int *eob, int *nz_num, int *nz_ci,
1523     int si, TX_SIZE tx_size, TX_CLASS tx_class, int bwl, int height,
1524     int dc_sign_ctx, int64_t rdmult, int shift, const int16_t *dequant,
1525     const int16_t *scan, const LV_MAP_EOB_COST *txb_eob_costs,
1526     const LV_MAP_COEFF_COST *txb_costs, const tran_low_t *tcoeff,
1527     tran_low_t *qcoeff, tran_low_t *dqcoeff, uint8_t *levels, int sharpness) {
1528   const int dqv = dequant[si != 0];
1529   assert(si != *eob - 1);
1530   const int ci = scan[si];
1531   const tran_low_t qc = qcoeff[ci];
1532   const int coeff_ctx =
1533       get_lower_levels_ctx(levels, ci, bwl, tx_size, tx_class);
1534   if (qc == 0) {
1535     *accu_rate += txb_costs->base_cost[coeff_ctx][0];
1536   } else {
1537     int lower_level = 0;
1538     const tran_low_t abs_qc = abs(qc);
1539     const tran_low_t tqc = tcoeff[ci];
1540     const tran_low_t dqc = dqcoeff[ci];
1541     const int sign = (qc < 0) ? 1 : 0;
1542     const int64_t dist0 = get_coeff_dist(tqc, 0, shift);
1543     int64_t dist = get_coeff_dist(tqc, dqc, shift) - dist0;
1544     int rate =
1545         get_coeff_cost_general(0, ci, abs_qc, sign, coeff_ctx, dc_sign_ctx,
1546                                txb_costs, bwl, tx_class, levels);
1547     int64_t rd = RDCOST(rdmult, *accu_rate + rate, *accu_dist + dist);
1548 
1549     tran_low_t qc_low, dqc_low;
1550     tran_low_t abs_qc_low;
1551     int64_t dist_low, rd_low;
1552     int rate_low;
1553     if (abs_qc == 1) {
1554       abs_qc_low = 0;
1555       dqc_low = qc_low = 0;
1556       dist_low = 0;
1557       rate_low = txb_costs->base_cost[coeff_ctx][0];
1558       rd_low = RDCOST(rdmult, *accu_rate + rate_low, *accu_dist);
1559     } else {
1560       get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low);
1561       abs_qc_low = abs_qc - 1;
1562       dist_low = get_coeff_dist(tqc, dqc_low, shift) - dist0;
1563       rate_low =
1564           get_coeff_cost_general(0, ci, abs_qc_low, sign, coeff_ctx,
1565                                  dc_sign_ctx, txb_costs, bwl, tx_class, levels);
1566       rd_low = RDCOST(rdmult, *accu_rate + rate_low, *accu_dist + dist_low);
1567     }
1568 
1569     int lower_level_new_eob = 0;
1570     const int new_eob = si + 1;
1571     const int coeff_ctx_new_eob = get_lower_levels_ctx_eob(bwl, height, si);
1572     const int new_eob_cost =
1573         get_eob_cost(new_eob, txb_eob_costs, txb_costs, tx_class);
1574     int rate_coeff_eob =
1575         new_eob_cost + get_coeff_cost_eob(ci, abs_qc, sign, coeff_ctx_new_eob,
1576                                           dc_sign_ctx, txb_costs, bwl,
1577                                           tx_class);
1578     int64_t dist_new_eob = dist;
1579     int64_t rd_new_eob = RDCOST(rdmult, rate_coeff_eob, dist_new_eob);
1580 
1581     if (abs_qc_low > 0) {
1582       const int rate_coeff_eob_low =
1583           new_eob_cost + get_coeff_cost_eob(ci, abs_qc_low, sign,
1584                                             coeff_ctx_new_eob, dc_sign_ctx,
1585                                             txb_costs, bwl, tx_class);
1586       const int64_t dist_new_eob_low = dist_low;
1587       const int64_t rd_new_eob_low =
1588           RDCOST(rdmult, rate_coeff_eob_low, dist_new_eob_low);
1589       if (rd_new_eob_low < rd_new_eob) {
1590         lower_level_new_eob = 1;
1591         rd_new_eob = rd_new_eob_low;
1592         rate_coeff_eob = rate_coeff_eob_low;
1593         dist_new_eob = dist_new_eob_low;
1594       }
1595     }
1596 
1597     if (rd_low < rd) {
1598       lower_level = 1;
1599       rd = rd_low;
1600       rate = rate_low;
1601       dist = dist_low;
1602     }
1603 
1604     if (sharpness == 0 && rd_new_eob < rd) {
1605       for (int ni = 0; ni < *nz_num; ++ni) {
1606         int last_ci = nz_ci[ni];
1607         levels[get_padded_idx(last_ci, bwl)] = 0;
1608         qcoeff[last_ci] = 0;
1609         dqcoeff[last_ci] = 0;
1610       }
1611       *eob = new_eob;
1612       *nz_num = 0;
1613       *accu_rate = rate_coeff_eob;
1614       *accu_dist = dist_new_eob;
1615       lower_level = lower_level_new_eob;
1616     } else {
1617       *accu_rate += rate;
1618       *accu_dist += dist;
1619     }
1620 
1621     if (lower_level) {
1622       qcoeff[ci] = qc_low;
1623       dqcoeff[ci] = dqc_low;
1624       levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX);
1625     }
1626     if (qcoeff[ci]) {
1627       nz_ci[*nz_num] = ci;
1628       ++*nz_num;
1629     }
1630   }
1631 }
1632 
1633 static INLINE void update_skip(int *accu_rate, int64_t accu_dist, int *eob,
1634                                int nz_num, int *nz_ci, int64_t rdmult,
1635                                int skip_cost, int non_skip_cost,
1636                                tran_low_t *qcoeff, tran_low_t *dqcoeff,
1637                                int sharpness) {
1638   const int64_t rd = RDCOST(rdmult, *accu_rate + non_skip_cost, accu_dist);
1639   const int64_t rd_new_eob = RDCOST(rdmult, skip_cost, 0);
1640   if (sharpness == 0 && rd_new_eob < rd) {
1641     for (int i = 0; i < nz_num; ++i) {
1642       const int ci = nz_ci[i];
1643       qcoeff[ci] = 0;
1644       dqcoeff[ci] = 0;
1645       // no need to set up levels because this is the last step
1646       // levels[get_padded_idx(ci, bwl)] = 0;
1647     }
1648     *accu_rate = 0;
1649     *eob = 0;
1650   }
1651 }
1652 
1653 int av1_optimize_txb_new(const struct AV1_COMP *cpi, MACROBLOCK *x, int plane,
1654                          int block, TX_SIZE tx_size, TX_TYPE tx_type,
1655                          const TXB_CTX *const txb_ctx, int *rate_cost,
1656                          int sharpness, int fast_mode) {
1657   MACROBLOCKD *xd = &x->e_mbd;
1658   struct macroblockd_plane *pd = &xd->plane[plane];
1659   const struct macroblock_plane *p = &x->plane[plane];
1660   const SCAN_ORDER *scan_order = get_scan(tx_size, tx_type);
1661   const int16_t *scan = scan_order->scan;
1662   const int shift = av1_get_tx_scale(tx_size);
1663   int eob = p->eobs[block];
1664   const int16_t *dequant = p->dequant_QTX;
1665   tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
1666   tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
1667   const tran_low_t *tcoeff = BLOCK_OFFSET(p->coeff, block);
1668 
1669   if (fast_mode) {
1670     update_coeff_eob_fast(&eob, shift, dequant, scan, tcoeff, qcoeff, dqcoeff);
1671     p->eobs[block] = eob;
1672     if (eob == 0) {
1673       *rate_cost = av1_cost_skip_txb(x, txb_ctx, plane, tx_size);
1674       return eob;
1675     }
1676   }
1677 
1678   const AV1_COMMON *cm = &cpi->common;
1679   const PLANE_TYPE plane_type = get_plane_type(plane);
1680   const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
1681   const TX_CLASS tx_class = tx_type_to_class[tx_type];
1682   const MB_MODE_INFO *mbmi = xd->mi[0];
1683   const int bwl = get_txb_bwl(tx_size);
1684   const int width = get_txb_wide(tx_size);
1685   const int height = get_txb_high(tx_size);
1686   assert(width == (1 << bwl));
1687   const int is_inter = is_inter_block(mbmi);
1688   const LV_MAP_COEFF_COST *txb_costs = &x->coeff_costs[txs_ctx][plane_type];
1689   const int eob_multi_size = txsize_log2_minus4[tx_size];
1690   const LV_MAP_EOB_COST *txb_eob_costs =
1691       &x->eob_costs[eob_multi_size][plane_type];
1692 
1693   const int rshift =
1694       (sharpness +
1695        (cpi->oxcf.aq_mode == VARIANCE_AQ && mbmi->segment_id < 4
1696             ? 7 - mbmi->segment_id
1697             : 2) +
1698        (cpi->oxcf.aq_mode != VARIANCE_AQ &&
1699                 cpi->oxcf.deltaq_mode > NO_DELTA_Q && x->sb_energy_level < 0
1700             ? (3 - x->sb_energy_level)
1701             : 0));
1702   const int64_t rdmult =
1703       (((int64_t)x->rdmult *
1704         (plane_rd_mult[is_inter][plane_type] << (2 * (xd->bd - 8)))) +
1705        2) >>
1706       rshift;
1707 
1708   uint8_t levels_buf[TX_PAD_2D];
1709   uint8_t *const levels = set_levels(levels_buf, width);
1710 
1711   if (eob > 1) av1_txb_init_levels(qcoeff, width, height, levels);
1712 
1713   // TODO(angirbird): check iqmatrix
1714 
1715   const int non_skip_cost = txb_costs->txb_skip_cost[txb_ctx->txb_skip_ctx][0];
1716   const int skip_cost = txb_costs->txb_skip_cost[txb_ctx->txb_skip_ctx][1];
1717   const int eob_cost = get_eob_cost(eob, txb_eob_costs, txb_costs, tx_class);
1718   int accu_rate = eob_cost;
1719   int64_t accu_dist = 0;
1720   int si = eob - 1;
1721   const int ci = scan[si];
1722   const tran_low_t qc = qcoeff[ci];
1723   const tran_low_t abs_qc = abs(qc);
1724   const int sign = qc < 0;
1725   const int max_nz_num = 2;
1726   int nz_num = 1;
1727   int nz_ci[3] = { ci, 0, 0 };
1728   if (abs_qc >= 2) {
1729     update_coeff_general(&accu_rate, &accu_dist, si, eob, tx_size, tx_class,
1730                          bwl, height, rdmult, shift, txb_ctx->dc_sign_ctx,
1731                          dequant, scan, txb_costs, tcoeff, qcoeff, dqcoeff,
1732                          levels);
1733     --si;
1734   } else {
1735     assert(abs_qc == 1);
1736     const int coeff_ctx = get_lower_levels_ctx_eob(bwl, height, si);
1737     accu_rate +=
1738         get_coeff_cost_eob(ci, abs_qc, sign, coeff_ctx, txb_ctx->dc_sign_ctx,
1739                            txb_costs, bwl, tx_class);
1740     const tran_low_t tqc = tcoeff[ci];
1741     const tran_low_t dqc = dqcoeff[ci];
1742     const int64_t dist = get_coeff_dist(tqc, dqc, shift);
1743     const int64_t dist0 = get_coeff_dist(tqc, 0, shift);
1744     accu_dist += dist - dist0;
1745     --si;
1746   }
1747 
1748 #define UPDATE_COEFF_EOB_CASE(tx_class_literal)                            \
1749   case tx_class_literal:                                                   \
1750     for (; si >= 0 && nz_num <= max_nz_num && !fast_mode; --si) {          \
1751       update_coeff_eob(&accu_rate, &accu_dist, &eob, &nz_num, nz_ci, si,   \
1752                        tx_size, tx_class_literal, bwl, height,             \
1753                        txb_ctx->dc_sign_ctx, rdmult, shift, dequant, scan, \
1754                        txb_eob_costs, txb_costs, tcoeff, qcoeff, dqcoeff,  \
1755                        levels, sharpness);                                 \
1756     }                                                                      \
1757     break;
1758   switch (tx_class) {
1759     UPDATE_COEFF_EOB_CASE(TX_CLASS_2D);
1760     UPDATE_COEFF_EOB_CASE(TX_CLASS_HORIZ);
1761     UPDATE_COEFF_EOB_CASE(TX_CLASS_VERT);
1762 #undef UPDATE_COEFF_EOB_CASE
1763     default: assert(false);
1764   }
1765 
1766   if (si == -1 && nz_num <= max_nz_num) {
1767     update_skip(&accu_rate, accu_dist, &eob, nz_num, nz_ci, rdmult, skip_cost,
1768                 non_skip_cost, qcoeff, dqcoeff, sharpness);
1769   }
1770 
1771 #define UPDATE_COEFF_SIMPLE_CASE(tx_class_literal)                             \
1772   case tx_class_literal:                                                       \
1773     for (; si >= 1; --si) {                                                    \
1774       update_coeff_simple(&accu_rate, si, eob, tx_size, tx_class_literal, bwl, \
1775                           rdmult, shift, dequant, scan, txb_costs, tcoeff,     \
1776                           qcoeff, dqcoeff, levels);                            \
1777     }                                                                          \
1778     break;
1779   switch (tx_class) {
1780     UPDATE_COEFF_SIMPLE_CASE(TX_CLASS_2D);
1781     UPDATE_COEFF_SIMPLE_CASE(TX_CLASS_HORIZ);
1782     UPDATE_COEFF_SIMPLE_CASE(TX_CLASS_VERT);
1783 #undef UPDATE_COEFF_SIMPLE_CASE
1784     default: assert(false);
1785   }
1786 
1787   // DC position
1788   if (si == 0) {
1789     // no need to update accu_dist because it's not used after this point
1790     int64_t dummy_dist = 0;
1791     update_coeff_general(&accu_rate, &dummy_dist, si, eob, tx_size, tx_class,
1792                          bwl, height, rdmult, shift, txb_ctx->dc_sign_ctx,
1793                          dequant, scan, txb_costs, tcoeff, qcoeff, dqcoeff,
1794                          levels);
1795   }
1796 
1797   const int tx_type_cost = get_tx_type_cost(cm, x, xd, plane, tx_size, tx_type);
1798   if (eob == 0)
1799     accu_rate += skip_cost;
1800   else
1801     accu_rate += non_skip_cost + tx_type_cost;
1802 
1803   p->eobs[block] = eob;
1804   p->txb_entropy_ctx[block] =
1805       av1_get_txb_entropy_context(qcoeff, scan_order, p->eobs[block]);
1806 
1807   *rate_cost = accu_rate;
1808   return eob;
1809 }
1810 
1811 // This function is deprecated, but we keep it here because hash trellis
1812 // is not integrated with av1_optimize_txb_new yet
1813 int av1_optimize_txb(const struct AV1_COMP *cpi, MACROBLOCK *x, int plane,
1814                      int blk_row, int blk_col, int block, TX_SIZE tx_size,
1815                      TXB_CTX *txb_ctx, int fast_mode, int *rate_cost) {
1816   const AV1_COMMON *cm = &cpi->common;
1817   MACROBLOCKD *const xd = &x->e_mbd;
1818   const PLANE_TYPE plane_type = get_plane_type(plane);
1819   const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
1820   const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col,
1821                                           tx_size, cm->reduced_tx_set_used);
1822   const MB_MODE_INFO *mbmi = xd->mi[0];
1823   const struct macroblock_plane *p = &x->plane[plane];
1824   struct macroblockd_plane *pd = &xd->plane[plane];
1825   const int eob = p->eobs[block];
1826   tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
1827   tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
1828   const tran_low_t *tcoeff = BLOCK_OFFSET(p->coeff, block);
1829   const int16_t *dequant = p->dequant_QTX;
1830   const int seg_eob = av1_get_max_eob(tx_size);
1831   const int bwl = get_txb_bwl(tx_size);
1832   const int width = get_txb_wide(tx_size);
1833   const int height = get_txb_high(tx_size);
1834   const int is_inter = is_inter_block(mbmi);
1835   const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type);
1836   const LV_MAP_COEFF_COST *txb_costs = &x->coeff_costs[txs_ctx][plane_type];
1837   const int eob_multi_size = txsize_log2_minus4[tx_size];
1838   const LV_MAP_EOB_COST txb_eob_costs =
1839       x->eob_costs[eob_multi_size][plane_type];
1840 
1841   const int shift = av1_get_tx_scale(tx_size);
1842   const int64_t rdmult =
1843       (((int64_t)x->rdmult * plane_rd_mult[is_inter][plane_type]
1844         << (2 * (xd->bd - 8))) +
1845        2) >>
1846       2;
1847   uint8_t levels_buf[TX_PAD_2D];
1848   uint8_t *const levels = set_levels(levels_buf, width);
1849   const TX_SIZE qm_tx_size = av1_get_adjusted_tx_size(tx_size);
1850   const qm_val_t *iqmatrix =
1851       IS_2D_TRANSFORM(tx_type)
1852           ? pd->seg_iqmatrix[mbmi->segment_id][qm_tx_size]
1853           : cm->giqmatrix[NUM_QM_LEVELS - 1][0][qm_tx_size];
1854   assert(width == (1 << bwl));
1855   const int tx_type_cost = get_tx_type_cost(cm, x, xd, plane, tx_size, tx_type);
1856   TxbInfo txb_info = {
1857     qcoeff,     levels,  dqcoeff, tcoeff,   dequant,      shift, tx_size,
1858     txs_ctx,    tx_type, bwl,     width,    height,       eob,   seg_eob,
1859     scan_order, txb_ctx, rdmult,  iqmatrix, tx_type_cost,
1860   };
1861 
1862   // Hash based trellis (hbt) speed feature: avoid expensive optimize_txb calls
1863   // by storing the coefficient deltas in a hash table.
1864   // Currently disabled in speedfeatures.c
1865   if (eob <= HBT_EOB && eob > 0 && cpi->sf.use_hash_based_trellis) {
1866     return hbt_create_hashes(&txb_info, txb_costs, &txb_eob_costs, p, block,
1867                              fast_mode, rate_cost);
1868   }
1869 
1870   av1_txb_init_levels(qcoeff, width, height, levels);
1871 
1872   const int update =
1873       optimize_txb(&txb_info, txb_costs, &txb_eob_costs, rate_cost);
1874 
1875   if (update) {
1876     p->eobs[block] = txb_info.eob;
1877     p->txb_entropy_ctx[block] =
1878         av1_get_txb_entropy_context(qcoeff, scan_order, txb_info.eob);
1879   }
1880   return txb_info.eob;
1881 }
1882 
1883 int av1_get_txb_entropy_context(const tran_low_t *qcoeff,
1884                                 const SCAN_ORDER *scan_order, int eob) {
1885   const int16_t *const scan = scan_order->scan;
1886   int cul_level = 0;
1887   int c;
1888 
1889   if (eob == 0) return 0;
1890   for (c = 0; c < eob; ++c) {
1891     cul_level += abs(qcoeff[scan[c]]);
1892     if (cul_level > COEFF_CONTEXT_MASK) break;
1893   }
1894 
1895   cul_level = AOMMIN(COEFF_CONTEXT_MASK, cul_level);
1896   set_dc_sign(&cul_level, qcoeff[0]);
1897 
1898   return cul_level;
1899 }
1900 
1901 void av1_update_txb_context_b(int plane, int block, int blk_row, int blk_col,
1902                               BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
1903                               void *arg) {
1904   struct tokenize_b_args *const args = arg;
1905   const AV1_COMP *cpi = args->cpi;
1906   const AV1_COMMON *cm = &cpi->common;
1907   ThreadData *const td = args->td;
1908   MACROBLOCK *const x = &td->mb;
1909   MACROBLOCKD *const xd = &x->e_mbd;
1910   struct macroblock_plane *p = &x->plane[plane];
1911   struct macroblockd_plane *pd = &xd->plane[plane];
1912   const uint16_t eob = p->eobs[block];
1913   const tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
1914   const PLANE_TYPE plane_type = pd->plane_type;
1915   const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col,
1916                                           tx_size, cm->reduced_tx_set_used);
1917   const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type);
1918   const int cul_level = av1_get_txb_entropy_context(qcoeff, scan_order, eob);
1919   av1_set_contexts(xd, pd, plane, plane_bsize, tx_size, cul_level, blk_col,
1920                    blk_row);
1921 }
1922 
1923 static void update_tx_type_count(const AV1_COMMON *cm, MACROBLOCKD *xd,
1924                                  int blk_row, int blk_col, int plane,
1925                                  TX_SIZE tx_size, FRAME_COUNTS *counts,
1926                                  uint8_t allow_update_cdf) {
1927   MB_MODE_INFO *mbmi = xd->mi[0];
1928   int is_inter = is_inter_block(mbmi);
1929   FRAME_CONTEXT *fc = xd->tile_ctx;
1930 #if !CONFIG_ENTROPY_STATS
1931   (void)counts;
1932 #endif  // !CONFIG_ENTROPY_STATS
1933 
1934   // Only y plane's tx_type is updated
1935   if (plane > 0) return;
1936   TX_TYPE tx_type = av1_get_tx_type(PLANE_TYPE_Y, xd, blk_row, blk_col, tx_size,
1937                                     cm->reduced_tx_set_used);
1938   if (get_ext_tx_types(tx_size, is_inter, cm->reduced_tx_set_used) > 1 &&
1939       cm->base_qindex > 0 && !mbmi->skip &&
1940       !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
1941     const int eset = get_ext_tx_set(tx_size, is_inter, cm->reduced_tx_set_used);
1942     if (eset > 0) {
1943       const TxSetType tx_set_type =
1944           av1_get_ext_tx_set_type(tx_size, is_inter, cm->reduced_tx_set_used);
1945       if (is_inter) {
1946         if (allow_update_cdf) {
1947           update_cdf(fc->inter_ext_tx_cdf[eset][txsize_sqr_map[tx_size]],
1948                      av1_ext_tx_ind[tx_set_type][tx_type],
1949                      av1_num_ext_tx_set[tx_set_type]);
1950         }
1951 #if CONFIG_ENTROPY_STATS
1952         ++counts->inter_ext_tx[eset][txsize_sqr_map[tx_size]]
1953                               [av1_ext_tx_ind[tx_set_type][tx_type]];
1954 #endif  // CONFIG_ENTROPY_STATS
1955       } else {
1956         PREDICTION_MODE intra_dir;
1957         if (mbmi->filter_intra_mode_info.use_filter_intra)
1958           intra_dir = fimode_to_intradir[mbmi->filter_intra_mode_info
1959                                              .filter_intra_mode];
1960         else
1961           intra_dir = mbmi->mode;
1962 #if CONFIG_ENTROPY_STATS
1963         ++counts->intra_ext_tx[eset][txsize_sqr_map[tx_size]][intra_dir]
1964                               [av1_ext_tx_ind[tx_set_type][tx_type]];
1965 #endif  // CONFIG_ENTROPY_STATS
1966         if (allow_update_cdf) {
1967           update_cdf(
1968               fc->intra_ext_tx_cdf[eset][txsize_sqr_map[tx_size]][intra_dir],
1969               av1_ext_tx_ind[tx_set_type][tx_type],
1970               av1_num_ext_tx_set[tx_set_type]);
1971         }
1972       }
1973     }
1974   }
1975 }
1976 
1977 void av1_update_and_record_txb_context(int plane, int block, int blk_row,
1978                                        int blk_col, BLOCK_SIZE plane_bsize,
1979                                        TX_SIZE tx_size, void *arg) {
1980   struct tokenize_b_args *const args = arg;
1981   const AV1_COMP *cpi = args->cpi;
1982   const AV1_COMMON *cm = &cpi->common;
1983   ThreadData *const td = args->td;
1984   MACROBLOCK *const x = &td->mb;
1985   MACROBLOCKD *const xd = &x->e_mbd;
1986   struct macroblock_plane *p = &x->plane[plane];
1987   struct macroblockd_plane *pd = &xd->plane[plane];
1988   MB_MODE_INFO *mbmi = xd->mi[0];
1989   const int eob = p->eobs[block];
1990   TXB_CTX txb_ctx;
1991   get_txb_ctx(plane_bsize, tx_size, plane, pd->above_context + blk_col,
1992               pd->left_context + blk_row, &txb_ctx);
1993   const int bwl = get_txb_bwl(tx_size);
1994   const int width = get_txb_wide(tx_size);
1995   const int height = get_txb_high(tx_size);
1996   const uint8_t allow_update_cdf = args->allow_update_cdf;
1997   const TX_SIZE txsize_ctx = get_txsize_entropy_ctx(tx_size);
1998   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
1999 #if CONFIG_ENTROPY_STATS
2000   int cdf_idx = cm->coef_cdf_category;
2001 #endif  // CONFIG_ENTROPY_STATS
2002 
2003 #if CONFIG_ENTROPY_STATS
2004   ++td->counts->txb_skip[cdf_idx][txsize_ctx][txb_ctx.txb_skip_ctx][eob == 0];
2005 #endif  // CONFIG_ENTROPY_STATS
2006   if (allow_update_cdf) {
2007     update_cdf(ec_ctx->txb_skip_cdf[txsize_ctx][txb_ctx.txb_skip_ctx], eob == 0,
2008                2);
2009   }
2010 
2011   const int txb_offset =
2012       x->mbmi_ext->cb_offset / (TX_SIZE_W_MIN * TX_SIZE_H_MIN);
2013   uint16_t *eob_txb = x->mbmi_ext->cb_coef_buff->eobs[plane] + txb_offset;
2014   uint8_t *txb_skip_ctx_txb =
2015       x->mbmi_ext->cb_coef_buff->txb_skip_ctx[plane] + txb_offset;
2016   txb_skip_ctx_txb[block] = txb_ctx.txb_skip_ctx;
2017   eob_txb[block] = eob;
2018 
2019   if (eob == 0) {
2020     av1_set_contexts(xd, pd, plane, plane_bsize, tx_size, 0, blk_col, blk_row);
2021     return;
2022   }
2023 
2024   tran_low_t *tcoeff_txb =
2025       x->mbmi_ext->cb_coef_buff->tcoeff[plane] + x->mbmi_ext->cb_offset;
2026   tran_low_t *tcoeff = BLOCK_OFFSET(tcoeff_txb, block);
2027   const int segment_id = mbmi->segment_id;
2028   const int seg_eob = av1_get_tx_eob(&cpi->common.seg, segment_id, tx_size);
2029   const tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
2030   memcpy(tcoeff, qcoeff, sizeof(*tcoeff) * seg_eob);
2031 
2032   uint8_t levels_buf[TX_PAD_2D];
2033   uint8_t *const levels = set_levels(levels_buf, width);
2034   av1_txb_init_levels(tcoeff, width, height, levels);
2035   update_tx_type_count(cm, xd, blk_row, blk_col, plane, tx_size, td->counts,
2036                        allow_update_cdf);
2037 
2038   const PLANE_TYPE plane_type = pd->plane_type;
2039   const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col,
2040                                           tx_size, cm->reduced_tx_set_used);
2041   const TX_CLASS tx_class = tx_type_to_class[tx_type];
2042   const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type);
2043   const int16_t *const scan = scan_order->scan;
2044 #if CONFIG_ENTROPY_STATS
2045   av1_update_eob_context(cdf_idx, eob, tx_size, tx_class, plane_type, ec_ctx,
2046                          td->counts, allow_update_cdf);
2047 #else
2048   av1_update_eob_context(eob, tx_size, tx_class, plane_type, ec_ctx,
2049                          allow_update_cdf);
2050 #endif
2051 
2052   DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]);
2053   av1_get_nz_map_contexts(levels, scan, eob, tx_size, tx_class, coeff_contexts);
2054 
2055   for (int c = eob - 1; c >= 0; --c) {
2056     const int pos = scan[c];
2057     const int coeff_ctx = coeff_contexts[pos];
2058     const tran_low_t v = qcoeff[pos];
2059     const tran_low_t level = abs(v);
2060 
2061     if (allow_update_cdf) {
2062       if (c == eob - 1) {
2063         assert(coeff_ctx < 4);
2064         update_cdf(
2065             ec_ctx->coeff_base_eob_cdf[txsize_ctx][plane_type][coeff_ctx],
2066             AOMMIN(level, 3) - 1, 3);
2067       } else {
2068         update_cdf(ec_ctx->coeff_base_cdf[txsize_ctx][plane_type][coeff_ctx],
2069                    AOMMIN(level, 3), 4);
2070       }
2071     }
2072     {
2073       if (c == eob - 1) {
2074         assert(coeff_ctx < 4);
2075 #if CONFIG_ENTROPY_STATS
2076         ++td->counts->coeff_base_eob_multi[cdf_idx][txsize_ctx][plane_type]
2077                                           [coeff_ctx][AOMMIN(level, 3) - 1];
2078       } else {
2079         ++td->counts->coeff_base_multi[cdf_idx][txsize_ctx][plane_type]
2080                                       [coeff_ctx][AOMMIN(level, 3)];
2081 #endif
2082       }
2083     }
2084     if (level > NUM_BASE_LEVELS) {
2085       const int base_range = level - 1 - NUM_BASE_LEVELS;
2086       const int br_ctx = get_br_ctx(levels, pos, bwl, tx_class);
2087       for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) {
2088         const int k = AOMMIN(base_range - idx, BR_CDF_SIZE - 1);
2089         if (allow_update_cdf) {
2090           update_cdf(ec_ctx->coeff_br_cdf[AOMMIN(txsize_ctx, TX_32X32)]
2091                                          [plane_type][br_ctx],
2092                      k, BR_CDF_SIZE);
2093         }
2094         for (int lps = 0; lps < BR_CDF_SIZE - 1; lps++) {
2095 #if CONFIG_ENTROPY_STATS
2096           ++td->counts->coeff_lps[AOMMIN(txsize_ctx, TX_32X32)][plane_type][lps]
2097                                  [br_ctx][lps == k];
2098 #endif  // CONFIG_ENTROPY_STATS
2099           if (lps == k) break;
2100         }
2101 #if CONFIG_ENTROPY_STATS
2102         ++td->counts->coeff_lps_multi[cdf_idx][AOMMIN(txsize_ctx, TX_32X32)]
2103                                      [plane_type][br_ctx][k];
2104 #endif
2105         if (k < BR_CDF_SIZE - 1) break;
2106       }
2107     }
2108   }
2109 
2110   // Update the context needed to code the DC sign (if applicable)
2111   if (tcoeff[0] != 0) {
2112     const int dc_sign = (tcoeff[0] < 0) ? 1 : 0;
2113     const int dc_sign_ctx = txb_ctx.dc_sign_ctx;
2114 #if CONFIG_ENTROPY_STATS
2115     ++td->counts->dc_sign[plane_type][dc_sign_ctx][dc_sign];
2116 #endif  // CONFIG_ENTROPY_STATS
2117     if (allow_update_cdf)
2118       update_cdf(ec_ctx->dc_sign_cdf[plane_type][dc_sign_ctx], dc_sign, 2);
2119     int *dc_sign_ctx_txb =
2120         x->mbmi_ext->cb_coef_buff->dc_sign_ctx[plane] + txb_offset;
2121     dc_sign_ctx_txb[block] = dc_sign_ctx;
2122   }
2123 
2124   const int cul_level = av1_get_txb_entropy_context(tcoeff, scan_order, eob);
2125   av1_set_contexts(xd, pd, plane, plane_bsize, tx_size, cul_level, blk_col,
2126                    blk_row);
2127 }
2128 
2129 void av1_update_txb_context(const AV1_COMP *cpi, ThreadData *td,
2130                             RUN_TYPE dry_run, BLOCK_SIZE bsize, int *rate,
2131                             int mi_row, int mi_col, uint8_t allow_update_cdf) {
2132   const AV1_COMMON *const cm = &cpi->common;
2133   const int num_planes = av1_num_planes(cm);
2134   MACROBLOCK *const x = &td->mb;
2135   MACROBLOCKD *const xd = &x->e_mbd;
2136   MB_MODE_INFO *const mbmi = xd->mi[0];
2137   struct tokenize_b_args arg = { cpi, td, NULL, 0, allow_update_cdf };
2138   (void)rate;
2139   (void)mi_row;
2140   (void)mi_col;
2141   if (mbmi->skip) {
2142     av1_reset_skip_context(xd, mi_row, mi_col, bsize, num_planes);
2143     return;
2144   }
2145 
2146   if (!dry_run) {
2147     av1_foreach_transformed_block(xd, bsize, mi_row, mi_col,
2148                                   av1_update_and_record_txb_context, &arg,
2149                                   num_planes);
2150   } else if (dry_run == DRY_RUN_NORMAL) {
2151     av1_foreach_transformed_block(xd, bsize, mi_row, mi_col,
2152                                   av1_update_txb_context_b, &arg, num_planes);
2153   } else {
2154     printf("DRY_RUN_COSTCOEFFS is not supported yet\n");
2155     assert(0);
2156   }
2157 }
2158