1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <assert.h>
12 #include <stdio.h>
13 #include <limits.h>
14 
15 #include "vpx/vpx_encoder.h"
16 #include "vpx_dsp/bitwriter_buffer.h"
17 #include "vpx_dsp/vpx_dsp_common.h"
18 #include "vpx_mem/vpx_mem.h"
19 #include "vpx_ports/mem_ops.h"
20 #include "vpx_ports/system_state.h"
21 
22 #include "vp9/common/vp9_entropy.h"
23 #include "vp9/common/vp9_entropymode.h"
24 #include "vp9/common/vp9_entropymv.h"
25 #include "vp9/common/vp9_mvref_common.h"
26 #include "vp9/common/vp9_pred_common.h"
27 #include "vp9/common/vp9_seg_common.h"
28 #include "vp9/common/vp9_tile_common.h"
29 
30 #include "vp9/encoder/vp9_cost.h"
31 #include "vp9/encoder/vp9_bitstream.h"
32 #include "vp9/encoder/vp9_encodemv.h"
33 #include "vp9/encoder/vp9_mcomp.h"
34 #include "vp9/encoder/vp9_segmentation.h"
35 #include "vp9/encoder/vp9_subexp.h"
36 #include "vp9/encoder/vp9_tokenize.h"
37 
38 static const struct vp9_token intra_mode_encodings[INTRA_MODES] = {
39   { 0, 1 },  { 6, 3 },   { 28, 5 },  { 30, 5 }, { 58, 6 },
40   { 59, 6 }, { 126, 7 }, { 127, 7 }, { 62, 6 }, { 2, 2 }
41 };
42 static const struct vp9_token switchable_interp_encodings[SWITCHABLE_FILTERS] =
43     { { 0, 1 }, { 2, 2 }, { 3, 2 } };
44 static const struct vp9_token partition_encodings[PARTITION_TYPES] = {
45   { 0, 1 }, { 2, 2 }, { 6, 3 }, { 7, 3 }
46 };
47 static const struct vp9_token inter_mode_encodings[INTER_MODES] = {
48   { 2, 2 }, { 6, 3 }, { 0, 1 }, { 7, 3 }
49 };
50 
write_intra_mode(vpx_writer * w,PREDICTION_MODE mode,const vpx_prob * probs)51 static void write_intra_mode(vpx_writer *w, PREDICTION_MODE mode,
52                              const vpx_prob *probs) {
53   vp9_write_token(w, vp9_intra_mode_tree, probs, &intra_mode_encodings[mode]);
54 }
55 
write_inter_mode(vpx_writer * w,PREDICTION_MODE mode,const vpx_prob * probs)56 static void write_inter_mode(vpx_writer *w, PREDICTION_MODE mode,
57                              const vpx_prob *probs) {
58   assert(is_inter_mode(mode));
59   vp9_write_token(w, vp9_inter_mode_tree, probs,
60                   &inter_mode_encodings[INTER_OFFSET(mode)]);
61 }
62 
encode_unsigned_max(struct vpx_write_bit_buffer * wb,int data,int max)63 static void encode_unsigned_max(struct vpx_write_bit_buffer *wb, int data,
64                                 int max) {
65   vpx_wb_write_literal(wb, data, get_unsigned_bits(max));
66 }
67 
prob_diff_update(const vpx_tree_index * tree,vpx_prob probs[],const unsigned int counts[],int n,vpx_writer * w)68 static void prob_diff_update(const vpx_tree_index *tree,
69                              vpx_prob probs[/*n - 1*/],
70                              const unsigned int counts[/*n - 1*/], int n,
71                              vpx_writer *w) {
72   int i;
73   unsigned int branch_ct[32][2];
74 
75   // Assuming max number of probabilities <= 32
76   assert(n <= 32);
77 
78   vp9_tree_probs_from_distribution(tree, branch_ct, counts);
79   for (i = 0; i < n - 1; ++i)
80     vp9_cond_prob_diff_update(w, &probs[i], branch_ct[i]);
81 }
82 
write_selected_tx_size(const VP9_COMMON * cm,const MACROBLOCKD * const xd,vpx_writer * w)83 static void write_selected_tx_size(const VP9_COMMON *cm,
84                                    const MACROBLOCKD *const xd, vpx_writer *w) {
85   TX_SIZE tx_size = xd->mi[0]->tx_size;
86   BLOCK_SIZE bsize = xd->mi[0]->sb_type;
87   const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
88   const vpx_prob *const tx_probs =
89       get_tx_probs(max_tx_size, get_tx_size_context(xd), &cm->fc->tx_probs);
90   vpx_write(w, tx_size != TX_4X4, tx_probs[0]);
91   if (tx_size != TX_4X4 && max_tx_size >= TX_16X16) {
92     vpx_write(w, tx_size != TX_8X8, tx_probs[1]);
93     if (tx_size != TX_8X8 && max_tx_size >= TX_32X32)
94       vpx_write(w, tx_size != TX_16X16, tx_probs[2]);
95   }
96 }
97 
write_skip(const VP9_COMMON * cm,const MACROBLOCKD * const xd,int segment_id,const MODE_INFO * mi,vpx_writer * w)98 static int write_skip(const VP9_COMMON *cm, const MACROBLOCKD *const xd,
99                       int segment_id, const MODE_INFO *mi, vpx_writer *w) {
100   if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
101     return 1;
102   } else {
103     const int skip = mi->skip;
104     vpx_write(w, skip, vp9_get_skip_prob(cm, xd));
105     return skip;
106   }
107 }
108 
update_skip_probs(VP9_COMMON * cm,vpx_writer * w,FRAME_COUNTS * counts)109 static void update_skip_probs(VP9_COMMON *cm, vpx_writer *w,
110                               FRAME_COUNTS *counts) {
111   int k;
112 
113   for (k = 0; k < SKIP_CONTEXTS; ++k)
114     vp9_cond_prob_diff_update(w, &cm->fc->skip_probs[k], counts->skip[k]);
115 }
116 
update_switchable_interp_probs(VP9_COMMON * cm,vpx_writer * w,FRAME_COUNTS * counts)117 static void update_switchable_interp_probs(VP9_COMMON *cm, vpx_writer *w,
118                                            FRAME_COUNTS *counts) {
119   int j;
120   for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j)
121     prob_diff_update(vp9_switchable_interp_tree,
122                      cm->fc->switchable_interp_prob[j],
123                      counts->switchable_interp[j], SWITCHABLE_FILTERS, w);
124 }
125 
pack_mb_tokens(vpx_writer * w,TOKENEXTRA ** tp,const TOKENEXTRA * const stop,vpx_bit_depth_t bit_depth)126 static void pack_mb_tokens(vpx_writer *w, TOKENEXTRA **tp,
127                            const TOKENEXTRA *const stop,
128                            vpx_bit_depth_t bit_depth) {
129   const TOKENEXTRA *p;
130   const vp9_extra_bit *const extra_bits =
131 #if CONFIG_VP9_HIGHBITDEPTH
132       (bit_depth == VPX_BITS_12)
133           ? vp9_extra_bits_high12
134           : (bit_depth == VPX_BITS_10) ? vp9_extra_bits_high10 : vp9_extra_bits;
135 #else
136       vp9_extra_bits;
137   (void)bit_depth;
138 #endif  // CONFIG_VP9_HIGHBITDEPTH
139 
140   for (p = *tp; p < stop && p->token != EOSB_TOKEN; ++p) {
141     if (p->token == EOB_TOKEN) {
142       vpx_write(w, 0, p->context_tree[0]);
143       continue;
144     }
145     vpx_write(w, 1, p->context_tree[0]);
146     while (p->token == ZERO_TOKEN) {
147       vpx_write(w, 0, p->context_tree[1]);
148       ++p;
149       if (p == stop || p->token == EOSB_TOKEN) {
150         *tp = (TOKENEXTRA *)(uintptr_t)p + (p->token == EOSB_TOKEN);
151         return;
152       }
153     }
154 
155     {
156       const int t = p->token;
157       const vpx_prob *const context_tree = p->context_tree;
158       assert(t != ZERO_TOKEN);
159       assert(t != EOB_TOKEN);
160       assert(t != EOSB_TOKEN);
161       vpx_write(w, 1, context_tree[1]);
162       if (t == ONE_TOKEN) {
163         vpx_write(w, 0, context_tree[2]);
164         vpx_write_bit(w, p->extra & 1);
165       } else {  // t >= TWO_TOKEN && t < EOB_TOKEN
166         const struct vp9_token *const a = &vp9_coef_encodings[t];
167         const int v = a->value;
168         const int n = a->len;
169         const int e = p->extra;
170         vpx_write(w, 1, context_tree[2]);
171         vp9_write_tree(w, vp9_coef_con_tree,
172                        vp9_pareto8_full[context_tree[PIVOT_NODE] - 1], v,
173                        n - UNCONSTRAINED_NODES, 0);
174         if (t >= CATEGORY1_TOKEN) {
175           const vp9_extra_bit *const b = &extra_bits[t];
176           const unsigned char *pb = b->prob;
177           int v = e >> 1;
178           int n = b->len;  // number of bits in v, assumed nonzero
179           do {
180             const int bb = (v >> --n) & 1;
181             vpx_write(w, bb, *pb++);
182           } while (n);
183         }
184         vpx_write_bit(w, e & 1);
185       }
186     }
187   }
188   *tp = (TOKENEXTRA *)(uintptr_t)p + (p->token == EOSB_TOKEN);
189 }
190 
write_segment_id(vpx_writer * w,const struct segmentation * seg,int segment_id)191 static void write_segment_id(vpx_writer *w, const struct segmentation *seg,
192                              int segment_id) {
193   if (seg->enabled && seg->update_map)
194     vp9_write_tree(w, vp9_segment_tree, seg->tree_probs, segment_id, 3, 0);
195 }
196 
197 // This function encodes the reference frame
write_ref_frames(const VP9_COMMON * cm,const MACROBLOCKD * const xd,vpx_writer * w)198 static void write_ref_frames(const VP9_COMMON *cm, const MACROBLOCKD *const xd,
199                              vpx_writer *w) {
200   const MODE_INFO *const mi = xd->mi[0];
201   const int is_compound = has_second_ref(mi);
202   const int segment_id = mi->segment_id;
203 
204   // If segment level coding of this signal is disabled...
205   // or the segment allows multiple reference frame options
206   if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
207     assert(!is_compound);
208     assert(mi->ref_frame[0] ==
209            get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME));
210   } else {
211     // does the feature use compound prediction or not
212     // (if not specified at the frame/segment level)
213     if (cm->reference_mode == REFERENCE_MODE_SELECT) {
214       vpx_write(w, is_compound, vp9_get_reference_mode_prob(cm, xd));
215     } else {
216       assert((!is_compound) == (cm->reference_mode == SINGLE_REFERENCE));
217     }
218 
219     if (is_compound) {
220       const int idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref];
221       vpx_write(w, mi->ref_frame[!idx] == cm->comp_var_ref[1],
222                 vp9_get_pred_prob_comp_ref_p(cm, xd));
223     } else {
224       const int bit0 = mi->ref_frame[0] != LAST_FRAME;
225       vpx_write(w, bit0, vp9_get_pred_prob_single_ref_p1(cm, xd));
226       if (bit0) {
227         const int bit1 = mi->ref_frame[0] != GOLDEN_FRAME;
228         vpx_write(w, bit1, vp9_get_pred_prob_single_ref_p2(cm, xd));
229       }
230     }
231   }
232 }
233 
pack_inter_mode_mvs(VP9_COMP * cpi,const MACROBLOCKD * const xd,const MB_MODE_INFO_EXT * const mbmi_ext,vpx_writer * w,unsigned int * const max_mv_magnitude,int interp_filter_selected[MAX_REF_FRAMES][SWITCHABLE])234 static void pack_inter_mode_mvs(
235     VP9_COMP *cpi, const MACROBLOCKD *const xd,
236     const MB_MODE_INFO_EXT *const mbmi_ext, vpx_writer *w,
237     unsigned int *const max_mv_magnitude,
238     int interp_filter_selected[MAX_REF_FRAMES][SWITCHABLE]) {
239   VP9_COMMON *const cm = &cpi->common;
240   const nmv_context *nmvc = &cm->fc->nmvc;
241   const struct segmentation *const seg = &cm->seg;
242   const MODE_INFO *const mi = xd->mi[0];
243   const PREDICTION_MODE mode = mi->mode;
244   const int segment_id = mi->segment_id;
245   const BLOCK_SIZE bsize = mi->sb_type;
246   const int allow_hp = cm->allow_high_precision_mv;
247   const int is_inter = is_inter_block(mi);
248   const int is_compound = has_second_ref(mi);
249   int skip, ref;
250 
251   if (seg->update_map) {
252     if (seg->temporal_update) {
253       const int pred_flag = mi->seg_id_predicted;
254       vpx_prob pred_prob = vp9_get_pred_prob_seg_id(seg, xd);
255       vpx_write(w, pred_flag, pred_prob);
256       if (!pred_flag) write_segment_id(w, seg, segment_id);
257     } else {
258       write_segment_id(w, seg, segment_id);
259     }
260   }
261 
262   skip = write_skip(cm, xd, segment_id, mi, w);
263 
264   if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
265     vpx_write(w, is_inter, vp9_get_intra_inter_prob(cm, xd));
266 
267   if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT &&
268       !(is_inter && skip)) {
269     write_selected_tx_size(cm, xd, w);
270   }
271 
272   if (!is_inter) {
273     if (bsize >= BLOCK_8X8) {
274       write_intra_mode(w, mode, cm->fc->y_mode_prob[size_group_lookup[bsize]]);
275     } else {
276       int idx, idy;
277       const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
278       const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
279       for (idy = 0; idy < 2; idy += num_4x4_h) {
280         for (idx = 0; idx < 2; idx += num_4x4_w) {
281           const PREDICTION_MODE b_mode = mi->bmi[idy * 2 + idx].as_mode;
282           write_intra_mode(w, b_mode, cm->fc->y_mode_prob[0]);
283         }
284       }
285     }
286     write_intra_mode(w, mi->uv_mode, cm->fc->uv_mode_prob[mode]);
287   } else {
288     const int mode_ctx = mbmi_ext->mode_context[mi->ref_frame[0]];
289     const vpx_prob *const inter_probs = cm->fc->inter_mode_probs[mode_ctx];
290     write_ref_frames(cm, xd, w);
291 
292     // If segment skip is not enabled code the mode.
293     if (!segfeature_active(seg, segment_id, SEG_LVL_SKIP)) {
294       if (bsize >= BLOCK_8X8) {
295         write_inter_mode(w, mode, inter_probs);
296       }
297     }
298 
299     if (cm->interp_filter == SWITCHABLE) {
300       const int ctx = get_pred_context_switchable_interp(xd);
301       vp9_write_token(w, vp9_switchable_interp_tree,
302                       cm->fc->switchable_interp_prob[ctx],
303                       &switchable_interp_encodings[mi->interp_filter]);
304       ++interp_filter_selected[0][mi->interp_filter];
305     } else {
306       assert(mi->interp_filter == cm->interp_filter);
307     }
308 
309     if (bsize < BLOCK_8X8) {
310       const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
311       const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
312       int idx, idy;
313       for (idy = 0; idy < 2; idy += num_4x4_h) {
314         for (idx = 0; idx < 2; idx += num_4x4_w) {
315           const int j = idy * 2 + idx;
316           const PREDICTION_MODE b_mode = mi->bmi[j].as_mode;
317           write_inter_mode(w, b_mode, inter_probs);
318           if (b_mode == NEWMV) {
319             for (ref = 0; ref < 1 + is_compound; ++ref)
320               vp9_encode_mv(cpi, w, &mi->bmi[j].as_mv[ref].as_mv,
321                             &mbmi_ext->ref_mvs[mi->ref_frame[ref]][0].as_mv,
322                             nmvc, allow_hp, max_mv_magnitude);
323           }
324         }
325       }
326     } else {
327       if (mode == NEWMV) {
328         for (ref = 0; ref < 1 + is_compound; ++ref)
329           vp9_encode_mv(cpi, w, &mi->mv[ref].as_mv,
330                         &mbmi_ext->ref_mvs[mi->ref_frame[ref]][0].as_mv, nmvc,
331                         allow_hp, max_mv_magnitude);
332       }
333     }
334   }
335 }
336 
write_mb_modes_kf(const VP9_COMMON * cm,const MACROBLOCKD * xd,vpx_writer * w)337 static void write_mb_modes_kf(const VP9_COMMON *cm, const MACROBLOCKD *xd,
338                               vpx_writer *w) {
339   const struct segmentation *const seg = &cm->seg;
340   const MODE_INFO *const mi = xd->mi[0];
341   const MODE_INFO *const above_mi = xd->above_mi;
342   const MODE_INFO *const left_mi = xd->left_mi;
343   const BLOCK_SIZE bsize = mi->sb_type;
344 
345   if (seg->update_map) write_segment_id(w, seg, mi->segment_id);
346 
347   write_skip(cm, xd, mi->segment_id, mi, w);
348 
349   if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT)
350     write_selected_tx_size(cm, xd, w);
351 
352   if (bsize >= BLOCK_8X8) {
353     write_intra_mode(w, mi->mode, get_y_mode_probs(mi, above_mi, left_mi, 0));
354   } else {
355     const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
356     const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
357     int idx, idy;
358 
359     for (idy = 0; idy < 2; idy += num_4x4_h) {
360       for (idx = 0; idx < 2; idx += num_4x4_w) {
361         const int block = idy * 2 + idx;
362         write_intra_mode(w, mi->bmi[block].as_mode,
363                          get_y_mode_probs(mi, above_mi, left_mi, block));
364       }
365     }
366   }
367 
368   write_intra_mode(w, mi->uv_mode, vp9_kf_uv_mode_prob[mi->mode]);
369 }
370 
write_modes_b(VP9_COMP * cpi,MACROBLOCKD * const xd,const TileInfo * const tile,vpx_writer * w,TOKENEXTRA ** tok,const TOKENEXTRA * const tok_end,int mi_row,int mi_col,unsigned int * const max_mv_magnitude,int interp_filter_selected[MAX_REF_FRAMES][SWITCHABLE])371 static void write_modes_b(
372     VP9_COMP *cpi, MACROBLOCKD *const xd, const TileInfo *const tile,
373     vpx_writer *w, TOKENEXTRA **tok, const TOKENEXTRA *const tok_end,
374     int mi_row, int mi_col, unsigned int *const max_mv_magnitude,
375     int interp_filter_selected[MAX_REF_FRAMES][SWITCHABLE]) {
376   const VP9_COMMON *const cm = &cpi->common;
377   const MB_MODE_INFO_EXT *const mbmi_ext =
378       cpi->td.mb.mbmi_ext_base + (mi_row * cm->mi_cols + mi_col);
379   MODE_INFO *m;
380 
381   xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col);
382   m = xd->mi[0];
383 
384   set_mi_row_col(xd, tile, mi_row, num_8x8_blocks_high_lookup[m->sb_type],
385                  mi_col, num_8x8_blocks_wide_lookup[m->sb_type], cm->mi_rows,
386                  cm->mi_cols);
387   if (frame_is_intra_only(cm)) {
388     write_mb_modes_kf(cm, xd, w);
389   } else {
390     pack_inter_mode_mvs(cpi, xd, mbmi_ext, w, max_mv_magnitude,
391                         interp_filter_selected);
392   }
393 
394   assert(*tok < tok_end);
395   pack_mb_tokens(w, tok, tok_end, cm->bit_depth);
396 }
397 
write_partition(const VP9_COMMON * const cm,const MACROBLOCKD * const xd,int hbs,int mi_row,int mi_col,PARTITION_TYPE p,BLOCK_SIZE bsize,vpx_writer * w)398 static void write_partition(const VP9_COMMON *const cm,
399                             const MACROBLOCKD *const xd, int hbs, int mi_row,
400                             int mi_col, PARTITION_TYPE p, BLOCK_SIZE bsize,
401                             vpx_writer *w) {
402   const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
403   const vpx_prob *const probs = xd->partition_probs[ctx];
404   const int has_rows = (mi_row + hbs) < cm->mi_rows;
405   const int has_cols = (mi_col + hbs) < cm->mi_cols;
406 
407   if (has_rows && has_cols) {
408     vp9_write_token(w, vp9_partition_tree, probs, &partition_encodings[p]);
409   } else if (!has_rows && has_cols) {
410     assert(p == PARTITION_SPLIT || p == PARTITION_HORZ);
411     vpx_write(w, p == PARTITION_SPLIT, probs[1]);
412   } else if (has_rows && !has_cols) {
413     assert(p == PARTITION_SPLIT || p == PARTITION_VERT);
414     vpx_write(w, p == PARTITION_SPLIT, probs[2]);
415   } else {
416     assert(p == PARTITION_SPLIT);
417   }
418 }
419 
write_modes_sb(VP9_COMP * cpi,MACROBLOCKD * const xd,const TileInfo * const tile,vpx_writer * w,TOKENEXTRA ** tok,const TOKENEXTRA * const tok_end,int mi_row,int mi_col,BLOCK_SIZE bsize,unsigned int * const max_mv_magnitude,int interp_filter_selected[MAX_REF_FRAMES][SWITCHABLE])420 static void write_modes_sb(
421     VP9_COMP *cpi, MACROBLOCKD *const xd, const TileInfo *const tile,
422     vpx_writer *w, TOKENEXTRA **tok, const TOKENEXTRA *const tok_end,
423     int mi_row, int mi_col, BLOCK_SIZE bsize,
424     unsigned int *const max_mv_magnitude,
425     int interp_filter_selected[MAX_REF_FRAMES][SWITCHABLE]) {
426   const VP9_COMMON *const cm = &cpi->common;
427   const int bsl = b_width_log2_lookup[bsize];
428   const int bs = (1 << bsl) / 4;
429   PARTITION_TYPE partition;
430   BLOCK_SIZE subsize;
431   const MODE_INFO *m = NULL;
432 
433   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
434 
435   m = cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col];
436 
437   partition = partition_lookup[bsl][m->sb_type];
438   write_partition(cm, xd, bs, mi_row, mi_col, partition, bsize, w);
439   subsize = get_subsize(bsize, partition);
440   if (subsize < BLOCK_8X8) {
441     write_modes_b(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col,
442                   max_mv_magnitude, interp_filter_selected);
443   } else {
444     switch (partition) {
445       case PARTITION_NONE:
446         write_modes_b(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col,
447                       max_mv_magnitude, interp_filter_selected);
448         break;
449       case PARTITION_HORZ:
450         write_modes_b(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col,
451                       max_mv_magnitude, interp_filter_selected);
452         if (mi_row + bs < cm->mi_rows)
453           write_modes_b(cpi, xd, tile, w, tok, tok_end, mi_row + bs, mi_col,
454                         max_mv_magnitude, interp_filter_selected);
455         break;
456       case PARTITION_VERT:
457         write_modes_b(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col,
458                       max_mv_magnitude, interp_filter_selected);
459         if (mi_col + bs < cm->mi_cols)
460           write_modes_b(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col + bs,
461                         max_mv_magnitude, interp_filter_selected);
462         break;
463       default:
464         assert(partition == PARTITION_SPLIT);
465         write_modes_sb(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col, subsize,
466                        max_mv_magnitude, interp_filter_selected);
467         write_modes_sb(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col + bs,
468                        subsize, max_mv_magnitude, interp_filter_selected);
469         write_modes_sb(cpi, xd, tile, w, tok, tok_end, mi_row + bs, mi_col,
470                        subsize, max_mv_magnitude, interp_filter_selected);
471         write_modes_sb(cpi, xd, tile, w, tok, tok_end, mi_row + bs, mi_col + bs,
472                        subsize, max_mv_magnitude, interp_filter_selected);
473         break;
474     }
475   }
476 
477   // update partition context
478   if (bsize >= BLOCK_8X8 &&
479       (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT))
480     update_partition_context(xd, mi_row, mi_col, subsize, bsize);
481 }
482 
write_modes(VP9_COMP * cpi,MACROBLOCKD * const xd,const TileInfo * const tile,vpx_writer * w,int tile_row,int tile_col,unsigned int * const max_mv_magnitude,int interp_filter_selected[MAX_REF_FRAMES][SWITCHABLE])483 static void write_modes(
484     VP9_COMP *cpi, MACROBLOCKD *const xd, const TileInfo *const tile,
485     vpx_writer *w, int tile_row, int tile_col,
486     unsigned int *const max_mv_magnitude,
487     int interp_filter_selected[MAX_REF_FRAMES][SWITCHABLE]) {
488   const VP9_COMMON *const cm = &cpi->common;
489   int mi_row, mi_col, tile_sb_row;
490   TOKENEXTRA *tok = NULL;
491   TOKENEXTRA *tok_end = NULL;
492 
493   set_partition_probs(cm, xd);
494 
495   for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end;
496        mi_row += MI_BLOCK_SIZE) {
497     tile_sb_row = mi_cols_aligned_to_sb(mi_row - tile->mi_row_start) >>
498                   MI_BLOCK_SIZE_LOG2;
499     tok = cpi->tplist[tile_row][tile_col][tile_sb_row].start;
500     tok_end = tok + cpi->tplist[tile_row][tile_col][tile_sb_row].count;
501 
502     vp9_zero(xd->left_seg_context);
503     for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
504          mi_col += MI_BLOCK_SIZE)
505       write_modes_sb(cpi, xd, tile, w, &tok, tok_end, mi_row, mi_col,
506                      BLOCK_64X64, max_mv_magnitude, interp_filter_selected);
507 
508     assert(tok == cpi->tplist[tile_row][tile_col][tile_sb_row].stop);
509   }
510 }
511 
build_tree_distribution(VP9_COMP * cpi,TX_SIZE tx_size,vp9_coeff_stats * coef_branch_ct,vp9_coeff_probs_model * coef_probs)512 static void build_tree_distribution(VP9_COMP *cpi, TX_SIZE tx_size,
513                                     vp9_coeff_stats *coef_branch_ct,
514                                     vp9_coeff_probs_model *coef_probs) {
515   vp9_coeff_count *coef_counts = cpi->td.rd_counts.coef_counts[tx_size];
516   unsigned int(*eob_branch_ct)[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] =
517       cpi->common.counts.eob_branch[tx_size];
518   int i, j, k, l, m;
519 
520   for (i = 0; i < PLANE_TYPES; ++i) {
521     for (j = 0; j < REF_TYPES; ++j) {
522       for (k = 0; k < COEF_BANDS; ++k) {
523         for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
524           vp9_tree_probs_from_distribution(vp9_coef_tree,
525                                            coef_branch_ct[i][j][k][l],
526                                            coef_counts[i][j][k][l]);
527           coef_branch_ct[i][j][k][l][0][1] =
528               eob_branch_ct[i][j][k][l] - coef_branch_ct[i][j][k][l][0][0];
529           for (m = 0; m < UNCONSTRAINED_NODES; ++m)
530             coef_probs[i][j][k][l][m] =
531                 get_binary_prob(coef_branch_ct[i][j][k][l][m][0],
532                                 coef_branch_ct[i][j][k][l][m][1]);
533         }
534       }
535     }
536   }
537 }
538 
update_coef_probs_common(vpx_writer * const bc,VP9_COMP * cpi,TX_SIZE tx_size,vp9_coeff_stats * frame_branch_ct,vp9_coeff_probs_model * new_coef_probs)539 static void update_coef_probs_common(vpx_writer *const bc, VP9_COMP *cpi,
540                                      TX_SIZE tx_size,
541                                      vp9_coeff_stats *frame_branch_ct,
542                                      vp9_coeff_probs_model *new_coef_probs) {
543   vp9_coeff_probs_model *old_coef_probs = cpi->common.fc->coef_probs[tx_size];
544   const vpx_prob upd = DIFF_UPDATE_PROB;
545   const int entropy_nodes_update = UNCONSTRAINED_NODES;
546   int i, j, k, l, t;
547   int stepsize = cpi->sf.coeff_prob_appx_step;
548 
549   switch (cpi->sf.use_fast_coef_updates) {
550     case TWO_LOOP: {
551       /* dry run to see if there is any update at all needed */
552       int savings = 0;
553       int update[2] = { 0, 0 };
554       for (i = 0; i < PLANE_TYPES; ++i) {
555         for (j = 0; j < REF_TYPES; ++j) {
556           for (k = 0; k < COEF_BANDS; ++k) {
557             for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
558               for (t = 0; t < entropy_nodes_update; ++t) {
559                 vpx_prob newp = new_coef_probs[i][j][k][l][t];
560                 const vpx_prob oldp = old_coef_probs[i][j][k][l][t];
561                 int s;
562                 int u = 0;
563                 if (t == PIVOT_NODE)
564                   s = vp9_prob_diff_update_savings_search_model(
565                       frame_branch_ct[i][j][k][l][0], oldp, &newp, upd,
566                       stepsize);
567                 else
568                   s = vp9_prob_diff_update_savings_search(
569                       frame_branch_ct[i][j][k][l][t], oldp, &newp, upd);
570                 if (s > 0 && newp != oldp) u = 1;
571                 if (u)
572                   savings += s - (int)(vp9_cost_zero(upd));
573                 else
574                   savings -= (int)(vp9_cost_zero(upd));
575                 update[u]++;
576               }
577             }
578           }
579         }
580       }
581 
582       // printf("Update %d %d, savings %d\n", update[0], update[1], savings);
583       /* Is coef updated at all */
584       if (update[1] == 0 || savings < 0) {
585         vpx_write_bit(bc, 0);
586         return;
587       }
588       vpx_write_bit(bc, 1);
589       for (i = 0; i < PLANE_TYPES; ++i) {
590         for (j = 0; j < REF_TYPES; ++j) {
591           for (k = 0; k < COEF_BANDS; ++k) {
592             for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
593               // calc probs and branch cts for this frame only
594               for (t = 0; t < entropy_nodes_update; ++t) {
595                 vpx_prob newp = new_coef_probs[i][j][k][l][t];
596                 vpx_prob *oldp = old_coef_probs[i][j][k][l] + t;
597                 const vpx_prob upd = DIFF_UPDATE_PROB;
598                 int s;
599                 int u = 0;
600                 if (t == PIVOT_NODE)
601                   s = vp9_prob_diff_update_savings_search_model(
602                       frame_branch_ct[i][j][k][l][0], *oldp, &newp, upd,
603                       stepsize);
604                 else
605                   s = vp9_prob_diff_update_savings_search(
606                       frame_branch_ct[i][j][k][l][t], *oldp, &newp, upd);
607                 if (s > 0 && newp != *oldp) u = 1;
608                 vpx_write(bc, u, upd);
609                 if (u) {
610                   /* send/use new probability */
611                   vp9_write_prob_diff_update(bc, newp, *oldp);
612                   *oldp = newp;
613                 }
614               }
615             }
616           }
617         }
618       }
619       return;
620     }
621 
622     default: {
623       int updates = 0;
624       int noupdates_before_first = 0;
625       assert(cpi->sf.use_fast_coef_updates == ONE_LOOP_REDUCED);
626       for (i = 0; i < PLANE_TYPES; ++i) {
627         for (j = 0; j < REF_TYPES; ++j) {
628           for (k = 0; k < COEF_BANDS; ++k) {
629             for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
630               // calc probs and branch cts for this frame only
631               for (t = 0; t < entropy_nodes_update; ++t) {
632                 vpx_prob newp = new_coef_probs[i][j][k][l][t];
633                 vpx_prob *oldp = old_coef_probs[i][j][k][l] + t;
634                 int s;
635                 int u = 0;
636 
637                 if (t == PIVOT_NODE) {
638                   s = vp9_prob_diff_update_savings_search_model(
639                       frame_branch_ct[i][j][k][l][0], *oldp, &newp, upd,
640                       stepsize);
641                 } else {
642                   s = vp9_prob_diff_update_savings_search(
643                       frame_branch_ct[i][j][k][l][t], *oldp, &newp, upd);
644                 }
645 
646                 if (s > 0 && newp != *oldp) u = 1;
647                 updates += u;
648                 if (u == 0 && updates == 0) {
649                   noupdates_before_first++;
650                   continue;
651                 }
652                 if (u == 1 && updates == 1) {
653                   int v;
654                   // first update
655                   vpx_write_bit(bc, 1);
656                   for (v = 0; v < noupdates_before_first; ++v)
657                     vpx_write(bc, 0, upd);
658                 }
659                 vpx_write(bc, u, upd);
660                 if (u) {
661                   /* send/use new probability */
662                   vp9_write_prob_diff_update(bc, newp, *oldp);
663                   *oldp = newp;
664                 }
665               }
666             }
667           }
668         }
669       }
670       if (updates == 0) {
671         vpx_write_bit(bc, 0);  // no updates
672       }
673       return;
674     }
675   }
676 }
677 
update_coef_probs(VP9_COMP * cpi,vpx_writer * w)678 static void update_coef_probs(VP9_COMP *cpi, vpx_writer *w) {
679   const TX_MODE tx_mode = cpi->common.tx_mode;
680   const TX_SIZE max_tx_size = tx_mode_to_biggest_tx_size[tx_mode];
681   TX_SIZE tx_size;
682   for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size) {
683     vp9_coeff_stats frame_branch_ct[PLANE_TYPES];
684     vp9_coeff_probs_model frame_coef_probs[PLANE_TYPES];
685     if (cpi->td.counts->tx.tx_totals[tx_size] <= 20 ||
686         (tx_size >= TX_16X16 && cpi->sf.tx_size_search_method == USE_TX_8X8)) {
687       vpx_write_bit(w, 0);
688     } else {
689       build_tree_distribution(cpi, tx_size, frame_branch_ct, frame_coef_probs);
690       update_coef_probs_common(w, cpi, tx_size, frame_branch_ct,
691                                frame_coef_probs);
692     }
693   }
694 }
695 
encode_loopfilter(struct loopfilter * lf,struct vpx_write_bit_buffer * wb)696 static void encode_loopfilter(struct loopfilter *lf,
697                               struct vpx_write_bit_buffer *wb) {
698   int i;
699 
700   // Encode the loop filter level and type
701   vpx_wb_write_literal(wb, lf->filter_level, 6);
702   vpx_wb_write_literal(wb, lf->sharpness_level, 3);
703 
704   // Write out loop filter deltas applied at the MB level based on mode or
705   // ref frame (if they are enabled).
706   vpx_wb_write_bit(wb, lf->mode_ref_delta_enabled);
707 
708   if (lf->mode_ref_delta_enabled) {
709     vpx_wb_write_bit(wb, lf->mode_ref_delta_update);
710     if (lf->mode_ref_delta_update) {
711       for (i = 0; i < MAX_REF_LF_DELTAS; i++) {
712         const int delta = lf->ref_deltas[i];
713         const int changed = delta != lf->last_ref_deltas[i];
714         vpx_wb_write_bit(wb, changed);
715         if (changed) {
716           lf->last_ref_deltas[i] = delta;
717           vpx_wb_write_literal(wb, abs(delta) & 0x3F, 6);
718           vpx_wb_write_bit(wb, delta < 0);
719         }
720       }
721 
722       for (i = 0; i < MAX_MODE_LF_DELTAS; i++) {
723         const int delta = lf->mode_deltas[i];
724         const int changed = delta != lf->last_mode_deltas[i];
725         vpx_wb_write_bit(wb, changed);
726         if (changed) {
727           lf->last_mode_deltas[i] = delta;
728           vpx_wb_write_literal(wb, abs(delta) & 0x3F, 6);
729           vpx_wb_write_bit(wb, delta < 0);
730         }
731       }
732     }
733   }
734 }
735 
write_delta_q(struct vpx_write_bit_buffer * wb,int delta_q)736 static void write_delta_q(struct vpx_write_bit_buffer *wb, int delta_q) {
737   if (delta_q != 0) {
738     vpx_wb_write_bit(wb, 1);
739     vpx_wb_write_literal(wb, abs(delta_q), 4);
740     vpx_wb_write_bit(wb, delta_q < 0);
741   } else {
742     vpx_wb_write_bit(wb, 0);
743   }
744 }
745 
encode_quantization(const VP9_COMMON * const cm,struct vpx_write_bit_buffer * wb)746 static void encode_quantization(const VP9_COMMON *const cm,
747                                 struct vpx_write_bit_buffer *wb) {
748   vpx_wb_write_literal(wb, cm->base_qindex, QINDEX_BITS);
749   write_delta_q(wb, cm->y_dc_delta_q);
750   write_delta_q(wb, cm->uv_dc_delta_q);
751   write_delta_q(wb, cm->uv_ac_delta_q);
752 }
753 
encode_segmentation(VP9_COMMON * cm,MACROBLOCKD * xd,struct vpx_write_bit_buffer * wb)754 static void encode_segmentation(VP9_COMMON *cm, MACROBLOCKD *xd,
755                                 struct vpx_write_bit_buffer *wb) {
756   int i, j;
757 
758   const struct segmentation *seg = &cm->seg;
759 
760   vpx_wb_write_bit(wb, seg->enabled);
761   if (!seg->enabled) return;
762 
763   // Segmentation map
764   vpx_wb_write_bit(wb, seg->update_map);
765   if (seg->update_map) {
766     // Select the coding strategy (temporal or spatial)
767     vp9_choose_segmap_coding_method(cm, xd);
768     // Write out probabilities used to decode unpredicted  macro-block segments
769     for (i = 0; i < SEG_TREE_PROBS; i++) {
770       const int prob = seg->tree_probs[i];
771       const int update = prob != MAX_PROB;
772       vpx_wb_write_bit(wb, update);
773       if (update) vpx_wb_write_literal(wb, prob, 8);
774     }
775 
776     // Write out the chosen coding method.
777     vpx_wb_write_bit(wb, seg->temporal_update);
778     if (seg->temporal_update) {
779       for (i = 0; i < PREDICTION_PROBS; i++) {
780         const int prob = seg->pred_probs[i];
781         const int update = prob != MAX_PROB;
782         vpx_wb_write_bit(wb, update);
783         if (update) vpx_wb_write_literal(wb, prob, 8);
784       }
785     }
786   }
787 
788   // Segmentation data
789   vpx_wb_write_bit(wb, seg->update_data);
790   if (seg->update_data) {
791     vpx_wb_write_bit(wb, seg->abs_delta);
792 
793     for (i = 0; i < MAX_SEGMENTS; i++) {
794       for (j = 0; j < SEG_LVL_MAX; j++) {
795         const int active = segfeature_active(seg, i, j);
796         vpx_wb_write_bit(wb, active);
797         if (active) {
798           const int data = get_segdata(seg, i, j);
799           const int data_max = vp9_seg_feature_data_max(j);
800 
801           if (vp9_is_segfeature_signed(j)) {
802             encode_unsigned_max(wb, abs(data), data_max);
803             vpx_wb_write_bit(wb, data < 0);
804           } else {
805             encode_unsigned_max(wb, data, data_max);
806           }
807         }
808       }
809     }
810   }
811 }
812 
encode_txfm_probs(VP9_COMMON * cm,vpx_writer * w,FRAME_COUNTS * counts)813 static void encode_txfm_probs(VP9_COMMON *cm, vpx_writer *w,
814                               FRAME_COUNTS *counts) {
815   // Mode
816   vpx_write_literal(w, VPXMIN(cm->tx_mode, ALLOW_32X32), 2);
817   if (cm->tx_mode >= ALLOW_32X32)
818     vpx_write_bit(w, cm->tx_mode == TX_MODE_SELECT);
819 
820   // Probabilities
821   if (cm->tx_mode == TX_MODE_SELECT) {
822     int i, j;
823     unsigned int ct_8x8p[TX_SIZES - 3][2];
824     unsigned int ct_16x16p[TX_SIZES - 2][2];
825     unsigned int ct_32x32p[TX_SIZES - 1][2];
826 
827     for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
828       tx_counts_to_branch_counts_8x8(counts->tx.p8x8[i], ct_8x8p);
829       for (j = 0; j < TX_SIZES - 3; j++)
830         vp9_cond_prob_diff_update(w, &cm->fc->tx_probs.p8x8[i][j], ct_8x8p[j]);
831     }
832 
833     for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
834       tx_counts_to_branch_counts_16x16(counts->tx.p16x16[i], ct_16x16p);
835       for (j = 0; j < TX_SIZES - 2; j++)
836         vp9_cond_prob_diff_update(w, &cm->fc->tx_probs.p16x16[i][j],
837                                   ct_16x16p[j]);
838     }
839 
840     for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
841       tx_counts_to_branch_counts_32x32(counts->tx.p32x32[i], ct_32x32p);
842       for (j = 0; j < TX_SIZES - 1; j++)
843         vp9_cond_prob_diff_update(w, &cm->fc->tx_probs.p32x32[i][j],
844                                   ct_32x32p[j]);
845     }
846   }
847 }
848 
write_interp_filter(INTERP_FILTER filter,struct vpx_write_bit_buffer * wb)849 static void write_interp_filter(INTERP_FILTER filter,
850                                 struct vpx_write_bit_buffer *wb) {
851   const int filter_to_literal[] = { 1, 0, 2, 3 };
852 
853   vpx_wb_write_bit(wb, filter == SWITCHABLE);
854   if (filter != SWITCHABLE)
855     vpx_wb_write_literal(wb, filter_to_literal[filter], 2);
856 }
857 
fix_interp_filter(VP9_COMMON * cm,FRAME_COUNTS * counts)858 static void fix_interp_filter(VP9_COMMON *cm, FRAME_COUNTS *counts) {
859   if (cm->interp_filter == SWITCHABLE) {
860     // Check to see if only one of the filters is actually used
861     int count[SWITCHABLE_FILTERS];
862     int i, j, c = 0;
863     for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
864       count[i] = 0;
865       for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j)
866         count[i] += counts->switchable_interp[j][i];
867       c += (count[i] > 0);
868     }
869     if (c == 1) {
870       // Only one filter is used. So set the filter at frame level
871       for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
872         if (count[i]) {
873           cm->interp_filter = i;
874           break;
875         }
876       }
877     }
878   }
879 }
880 
write_tile_info(const VP9_COMMON * const cm,struct vpx_write_bit_buffer * wb)881 static void write_tile_info(const VP9_COMMON *const cm,
882                             struct vpx_write_bit_buffer *wb) {
883   int min_log2_tile_cols, max_log2_tile_cols, ones;
884   vp9_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
885 
886   // columns
887   ones = cm->log2_tile_cols - min_log2_tile_cols;
888   while (ones--) vpx_wb_write_bit(wb, 1);
889 
890   if (cm->log2_tile_cols < max_log2_tile_cols) vpx_wb_write_bit(wb, 0);
891 
892   // rows
893   vpx_wb_write_bit(wb, cm->log2_tile_rows != 0);
894   if (cm->log2_tile_rows != 0) vpx_wb_write_bit(wb, cm->log2_tile_rows != 1);
895 }
896 
vp9_get_refresh_mask(VP9_COMP * cpi)897 int vp9_get_refresh_mask(VP9_COMP *cpi) {
898   if (vp9_preserve_existing_gf(cpi)) {
899     // We have decided to preserve the previously existing golden frame as our
900     // new ARF frame. However, in the short term we leave it in the GF slot and,
901     // if we're updating the GF with the current decoded frame, we save it
902     // instead to the ARF slot.
903     // Later, in the function vp9_encoder.c:vp9_update_reference_frames() we
904     // will swap gld_fb_idx and alt_fb_idx to achieve our objective. We do it
905     // there so that it can be done outside of the recode loop.
906     // Note: This is highly specific to the use of ARF as a forward reference,
907     // and this needs to be generalized as other uses are implemented
908     // (like RTC/temporal scalability).
909     return (cpi->refresh_last_frame << cpi->lst_fb_idx) |
910            (cpi->refresh_golden_frame << cpi->alt_fb_idx);
911   } else {
912     int arf_idx = cpi->alt_fb_idx;
913     GF_GROUP *const gf_group = &cpi->twopass.gf_group;
914 
915     if (cpi->multi_layer_arf) {
916       for (arf_idx = 0; arf_idx < REF_FRAMES; ++arf_idx) {
917         if (arf_idx != cpi->alt_fb_idx && arf_idx != cpi->lst_fb_idx &&
918             arf_idx != cpi->gld_fb_idx) {
919           int idx;
920           for (idx = 0; idx < gf_group->stack_size; ++idx)
921             if (arf_idx == gf_group->arf_index_stack[idx]) break;
922           if (idx == gf_group->stack_size) break;
923         }
924       }
925     }
926     cpi->twopass.gf_group.top_arf_idx = arf_idx;
927 
928     if (cpi->use_svc && cpi->svc.use_set_ref_frame_config &&
929         cpi->svc.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS)
930       return cpi->svc.update_buffer_slot[cpi->svc.spatial_layer_id];
931     return (cpi->refresh_last_frame << cpi->lst_fb_idx) |
932            (cpi->refresh_golden_frame << cpi->gld_fb_idx) |
933            (cpi->refresh_alt_ref_frame << arf_idx);
934   }
935 }
936 
encode_tile_worker(void * arg1,void * arg2)937 static int encode_tile_worker(void *arg1, void *arg2) {
938   VP9_COMP *cpi = (VP9_COMP *)arg1;
939   VP9BitstreamWorkerData *data = (VP9BitstreamWorkerData *)arg2;
940   MACROBLOCKD *const xd = &data->xd;
941   const int tile_row = 0;
942   vpx_start_encode(&data->bit_writer, data->dest);
943   write_modes(cpi, xd, &cpi->tile_data[data->tile_idx].tile_info,
944               &data->bit_writer, tile_row, data->tile_idx,
945               &data->max_mv_magnitude, data->interp_filter_selected);
946   vpx_stop_encode(&data->bit_writer);
947   return 1;
948 }
949 
vp9_bitstream_encode_tiles_buffer_dealloc(VP9_COMP * const cpi)950 void vp9_bitstream_encode_tiles_buffer_dealloc(VP9_COMP *const cpi) {
951   if (cpi->vp9_bitstream_worker_data) {
952     int i;
953     for (i = 1; i < cpi->num_workers; ++i) {
954       vpx_free(cpi->vp9_bitstream_worker_data[i].dest);
955     }
956     vpx_free(cpi->vp9_bitstream_worker_data);
957     cpi->vp9_bitstream_worker_data = NULL;
958   }
959 }
960 
encode_tiles_buffer_alloc(VP9_COMP * const cpi)961 static int encode_tiles_buffer_alloc(VP9_COMP *const cpi) {
962   int i;
963   const size_t worker_data_size =
964       cpi->num_workers * sizeof(*cpi->vp9_bitstream_worker_data);
965   cpi->vp9_bitstream_worker_data = vpx_memalign(16, worker_data_size);
966   memset(cpi->vp9_bitstream_worker_data, 0, worker_data_size);
967   if (!cpi->vp9_bitstream_worker_data) return 1;
968   for (i = 1; i < cpi->num_workers; ++i) {
969     cpi->vp9_bitstream_worker_data[i].dest_size =
970         cpi->oxcf.width * cpi->oxcf.height;
971     cpi->vp9_bitstream_worker_data[i].dest =
972         vpx_malloc(cpi->vp9_bitstream_worker_data[i].dest_size);
973     if (!cpi->vp9_bitstream_worker_data[i].dest) return 1;
974   }
975   return 0;
976 }
977 
encode_tiles_mt(VP9_COMP * cpi,uint8_t * data_ptr)978 static size_t encode_tiles_mt(VP9_COMP *cpi, uint8_t *data_ptr) {
979   const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
980   VP9_COMMON *const cm = &cpi->common;
981   const int tile_cols = 1 << cm->log2_tile_cols;
982   const int num_workers = cpi->num_workers;
983   size_t total_size = 0;
984   int tile_col = 0;
985 
986   if (!cpi->vp9_bitstream_worker_data ||
987       cpi->vp9_bitstream_worker_data[1].dest_size >
988           (cpi->oxcf.width * cpi->oxcf.height)) {
989     vp9_bitstream_encode_tiles_buffer_dealloc(cpi);
990     if (encode_tiles_buffer_alloc(cpi)) return 0;
991   }
992 
993   while (tile_col < tile_cols) {
994     int i, j;
995     for (i = 0; i < num_workers && tile_col < tile_cols; ++i) {
996       VPxWorker *const worker = &cpi->workers[i];
997       VP9BitstreamWorkerData *const data = &cpi->vp9_bitstream_worker_data[i];
998 
999       // Populate the worker data.
1000       data->xd = cpi->td.mb.e_mbd;
1001       data->tile_idx = tile_col;
1002       data->max_mv_magnitude = cpi->max_mv_magnitude;
1003       memset(data->interp_filter_selected, 0,
1004              sizeof(data->interp_filter_selected[0][0]) * SWITCHABLE);
1005 
1006       // First thread can directly write into the output buffer.
1007       if (i == 0) {
1008         // If this worker happens to be for the last tile, then do not offset it
1009         // by 4 for the tile size.
1010         data->dest =
1011             data_ptr + total_size + (tile_col == tile_cols - 1 ? 0 : 4);
1012       }
1013       worker->data1 = cpi;
1014       worker->data2 = data;
1015       worker->hook = encode_tile_worker;
1016       worker->had_error = 0;
1017 
1018       if (i < num_workers - 1) {
1019         winterface->launch(worker);
1020       } else {
1021         winterface->execute(worker);
1022       }
1023       ++tile_col;
1024     }
1025     for (j = 0; j < i; ++j) {
1026       VPxWorker *const worker = &cpi->workers[j];
1027       VP9BitstreamWorkerData *const data =
1028           (VP9BitstreamWorkerData *)worker->data2;
1029       uint32_t tile_size;
1030       int k;
1031 
1032       if (!winterface->sync(worker)) return 0;
1033       tile_size = data->bit_writer.pos;
1034 
1035       // Aggregate per-thread bitstream stats.
1036       cpi->max_mv_magnitude =
1037           VPXMAX(cpi->max_mv_magnitude, data->max_mv_magnitude);
1038       for (k = 0; k < SWITCHABLE; ++k) {
1039         cpi->interp_filter_selected[0][k] += data->interp_filter_selected[0][k];
1040       }
1041 
1042       // Prefix the size of the tile on all but the last.
1043       if (tile_col != tile_cols || j < i - 1) {
1044         mem_put_be32(data_ptr + total_size, tile_size);
1045         total_size += 4;
1046       }
1047       if (j > 0) {
1048         memcpy(data_ptr + total_size, data->dest, tile_size);
1049       }
1050       total_size += tile_size;
1051     }
1052   }
1053   return total_size;
1054 }
1055 
encode_tiles(VP9_COMP * cpi,uint8_t * data_ptr)1056 static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) {
1057   VP9_COMMON *const cm = &cpi->common;
1058   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
1059   vpx_writer residual_bc;
1060   int tile_row, tile_col;
1061   size_t total_size = 0;
1062   const int tile_cols = 1 << cm->log2_tile_cols;
1063   const int tile_rows = 1 << cm->log2_tile_rows;
1064 
1065   memset(cm->above_seg_context, 0,
1066          sizeof(*cm->above_seg_context) * mi_cols_aligned_to_sb(cm->mi_cols));
1067 
1068   // Encoding tiles in parallel is done only for realtime mode now. In other
1069   // modes the speed up is insignificant and requires further testing to ensure
1070   // that it does not make the overall process worse in any case.
1071   if (cpi->oxcf.mode == REALTIME && cpi->num_workers > 1 && tile_rows == 1 &&
1072       tile_cols > 1) {
1073     return encode_tiles_mt(cpi, data_ptr);
1074   }
1075 
1076   for (tile_row = 0; tile_row < tile_rows; tile_row++) {
1077     for (tile_col = 0; tile_col < tile_cols; tile_col++) {
1078       int tile_idx = tile_row * tile_cols + tile_col;
1079 
1080       if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1)
1081         vpx_start_encode(&residual_bc, data_ptr + total_size + 4);
1082       else
1083         vpx_start_encode(&residual_bc, data_ptr + total_size);
1084 
1085       write_modes(cpi, xd, &cpi->tile_data[tile_idx].tile_info, &residual_bc,
1086                   tile_row, tile_col, &cpi->max_mv_magnitude,
1087                   cpi->interp_filter_selected);
1088 
1089       vpx_stop_encode(&residual_bc);
1090       if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1) {
1091         // size of this tile
1092         mem_put_be32(data_ptr + total_size, residual_bc.pos);
1093         total_size += 4;
1094       }
1095 
1096       total_size += residual_bc.pos;
1097     }
1098   }
1099   return total_size;
1100 }
1101 
write_render_size(const VP9_COMMON * cm,struct vpx_write_bit_buffer * wb)1102 static void write_render_size(const VP9_COMMON *cm,
1103                               struct vpx_write_bit_buffer *wb) {
1104   const int scaling_active =
1105       cm->width != cm->render_width || cm->height != cm->render_height;
1106   vpx_wb_write_bit(wb, scaling_active);
1107   if (scaling_active) {
1108     vpx_wb_write_literal(wb, cm->render_width - 1, 16);
1109     vpx_wb_write_literal(wb, cm->render_height - 1, 16);
1110   }
1111 }
1112 
write_frame_size(const VP9_COMMON * cm,struct vpx_write_bit_buffer * wb)1113 static void write_frame_size(const VP9_COMMON *cm,
1114                              struct vpx_write_bit_buffer *wb) {
1115   vpx_wb_write_literal(wb, cm->width - 1, 16);
1116   vpx_wb_write_literal(wb, cm->height - 1, 16);
1117 
1118   write_render_size(cm, wb);
1119 }
1120 
write_frame_size_with_refs(VP9_COMP * cpi,struct vpx_write_bit_buffer * wb)1121 static void write_frame_size_with_refs(VP9_COMP *cpi,
1122                                        struct vpx_write_bit_buffer *wb) {
1123   VP9_COMMON *const cm = &cpi->common;
1124   int found = 0;
1125 
1126   MV_REFERENCE_FRAME ref_frame;
1127   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
1128     YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, ref_frame);
1129 
1130     // Set "found" to 0 for temporal svc and for spatial svc key frame
1131     if (cpi->use_svc &&
1132         ((cpi->svc.number_temporal_layers > 1 &&
1133           cpi->oxcf.rc_mode == VPX_CBR) ||
1134          (cpi->svc.number_spatial_layers > 1 &&
1135           cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame))) {
1136       found = 0;
1137     } else if (cfg != NULL) {
1138       found =
1139           cm->width == cfg->y_crop_width && cm->height == cfg->y_crop_height;
1140     }
1141     vpx_wb_write_bit(wb, found);
1142     if (found) {
1143       break;
1144     }
1145   }
1146 
1147   if (!found) {
1148     vpx_wb_write_literal(wb, cm->width - 1, 16);
1149     vpx_wb_write_literal(wb, cm->height - 1, 16);
1150   }
1151 
1152   write_render_size(cm, wb);
1153 }
1154 
write_sync_code(struct vpx_write_bit_buffer * wb)1155 static void write_sync_code(struct vpx_write_bit_buffer *wb) {
1156   vpx_wb_write_literal(wb, VP9_SYNC_CODE_0, 8);
1157   vpx_wb_write_literal(wb, VP9_SYNC_CODE_1, 8);
1158   vpx_wb_write_literal(wb, VP9_SYNC_CODE_2, 8);
1159 }
1160 
write_profile(BITSTREAM_PROFILE profile,struct vpx_write_bit_buffer * wb)1161 static void write_profile(BITSTREAM_PROFILE profile,
1162                           struct vpx_write_bit_buffer *wb) {
1163   switch (profile) {
1164     case PROFILE_0: vpx_wb_write_literal(wb, 0, 2); break;
1165     case PROFILE_1: vpx_wb_write_literal(wb, 2, 2); break;
1166     case PROFILE_2: vpx_wb_write_literal(wb, 1, 2); break;
1167     default:
1168       assert(profile == PROFILE_3);
1169       vpx_wb_write_literal(wb, 6, 3);
1170       break;
1171   }
1172 }
1173 
write_bitdepth_colorspace_sampling(VP9_COMMON * const cm,struct vpx_write_bit_buffer * wb)1174 static void write_bitdepth_colorspace_sampling(
1175     VP9_COMMON *const cm, struct vpx_write_bit_buffer *wb) {
1176   if (cm->profile >= PROFILE_2) {
1177     assert(cm->bit_depth > VPX_BITS_8);
1178     vpx_wb_write_bit(wb, cm->bit_depth == VPX_BITS_10 ? 0 : 1);
1179   }
1180   vpx_wb_write_literal(wb, cm->color_space, 3);
1181   if (cm->color_space != VPX_CS_SRGB) {
1182     // 0: [16, 235] (i.e. xvYCC), 1: [0, 255]
1183     vpx_wb_write_bit(wb, cm->color_range);
1184     if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) {
1185       assert(cm->subsampling_x != 1 || cm->subsampling_y != 1);
1186       vpx_wb_write_bit(wb, cm->subsampling_x);
1187       vpx_wb_write_bit(wb, cm->subsampling_y);
1188       vpx_wb_write_bit(wb, 0);  // unused
1189     } else {
1190       assert(cm->subsampling_x == 1 && cm->subsampling_y == 1);
1191     }
1192   } else {
1193     assert(cm->profile == PROFILE_1 || cm->profile == PROFILE_3);
1194     vpx_wb_write_bit(wb, 0);  // unused
1195   }
1196 }
1197 
write_uncompressed_header(VP9_COMP * cpi,struct vpx_write_bit_buffer * wb)1198 static void write_uncompressed_header(VP9_COMP *cpi,
1199                                       struct vpx_write_bit_buffer *wb) {
1200   VP9_COMMON *const cm = &cpi->common;
1201   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
1202 
1203   vpx_wb_write_literal(wb, VP9_FRAME_MARKER, 2);
1204 
1205   write_profile(cm->profile, wb);
1206 
1207   // If to use show existing frame.
1208   vpx_wb_write_bit(wb, cm->show_existing_frame);
1209   if (cm->show_existing_frame) {
1210     vpx_wb_write_literal(wb, cpi->alt_fb_idx, 3);
1211     return;
1212   }
1213 
1214   vpx_wb_write_bit(wb, cm->frame_type);
1215   vpx_wb_write_bit(wb, cm->show_frame);
1216   vpx_wb_write_bit(wb, cm->error_resilient_mode);
1217 
1218   if (cm->frame_type == KEY_FRAME) {
1219     write_sync_code(wb);
1220     write_bitdepth_colorspace_sampling(cm, wb);
1221     write_frame_size(cm, wb);
1222   } else {
1223     if (!cm->show_frame) vpx_wb_write_bit(wb, cm->intra_only);
1224 
1225     if (!cm->error_resilient_mode)
1226       vpx_wb_write_literal(wb, cm->reset_frame_context, 2);
1227 
1228     if (cm->intra_only) {
1229       write_sync_code(wb);
1230 
1231       // Note for profile 0, 420 8bpp is assumed.
1232       if (cm->profile > PROFILE_0) {
1233         write_bitdepth_colorspace_sampling(cm, wb);
1234       }
1235 
1236       vpx_wb_write_literal(wb, vp9_get_refresh_mask(cpi), REF_FRAMES);
1237       write_frame_size(cm, wb);
1238     } else {
1239       MV_REFERENCE_FRAME ref_frame;
1240       vpx_wb_write_literal(wb, vp9_get_refresh_mask(cpi), REF_FRAMES);
1241       for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
1242         assert(get_ref_frame_map_idx(cpi, ref_frame) != INVALID_IDX);
1243         vpx_wb_write_literal(wb, get_ref_frame_map_idx(cpi, ref_frame),
1244                              REF_FRAMES_LOG2);
1245         vpx_wb_write_bit(wb, cm->ref_frame_sign_bias[ref_frame]);
1246       }
1247 
1248       write_frame_size_with_refs(cpi, wb);
1249 
1250       vpx_wb_write_bit(wb, cm->allow_high_precision_mv);
1251 
1252       fix_interp_filter(cm, cpi->td.counts);
1253       write_interp_filter(cm->interp_filter, wb);
1254     }
1255   }
1256 
1257   if (!cm->error_resilient_mode) {
1258     vpx_wb_write_bit(wb, cm->refresh_frame_context);
1259     vpx_wb_write_bit(wb, cm->frame_parallel_decoding_mode);
1260   }
1261 
1262   vpx_wb_write_literal(wb, cm->frame_context_idx, FRAME_CONTEXTS_LOG2);
1263 
1264   encode_loopfilter(&cm->lf, wb);
1265   encode_quantization(cm, wb);
1266   encode_segmentation(cm, xd, wb);
1267 
1268   write_tile_info(cm, wb);
1269 }
1270 
write_compressed_header(VP9_COMP * cpi,uint8_t * data)1271 static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) {
1272   VP9_COMMON *const cm = &cpi->common;
1273   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
1274   FRAME_CONTEXT *const fc = cm->fc;
1275   FRAME_COUNTS *counts = cpi->td.counts;
1276   vpx_writer header_bc;
1277 
1278   vpx_start_encode(&header_bc, data);
1279 
1280   if (xd->lossless)
1281     cm->tx_mode = ONLY_4X4;
1282   else
1283     encode_txfm_probs(cm, &header_bc, counts);
1284 
1285   update_coef_probs(cpi, &header_bc);
1286   update_skip_probs(cm, &header_bc, counts);
1287 
1288   if (!frame_is_intra_only(cm)) {
1289     int i;
1290 
1291     for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
1292       prob_diff_update(vp9_inter_mode_tree, cm->fc->inter_mode_probs[i],
1293                        counts->inter_mode[i], INTER_MODES, &header_bc);
1294 
1295     if (cm->interp_filter == SWITCHABLE)
1296       update_switchable_interp_probs(cm, &header_bc, counts);
1297 
1298     for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
1299       vp9_cond_prob_diff_update(&header_bc, &fc->intra_inter_prob[i],
1300                                 counts->intra_inter[i]);
1301 
1302     if (cpi->allow_comp_inter_inter) {
1303       const int use_compound_pred = cm->reference_mode != SINGLE_REFERENCE;
1304       const int use_hybrid_pred = cm->reference_mode == REFERENCE_MODE_SELECT;
1305 
1306       vpx_write_bit(&header_bc, use_compound_pred);
1307       if (use_compound_pred) {
1308         vpx_write_bit(&header_bc, use_hybrid_pred);
1309         if (use_hybrid_pred)
1310           for (i = 0; i < COMP_INTER_CONTEXTS; i++)
1311             vp9_cond_prob_diff_update(&header_bc, &fc->comp_inter_prob[i],
1312                                       counts->comp_inter[i]);
1313       }
1314     }
1315 
1316     if (cm->reference_mode != COMPOUND_REFERENCE) {
1317       for (i = 0; i < REF_CONTEXTS; i++) {
1318         vp9_cond_prob_diff_update(&header_bc, &fc->single_ref_prob[i][0],
1319                                   counts->single_ref[i][0]);
1320         vp9_cond_prob_diff_update(&header_bc, &fc->single_ref_prob[i][1],
1321                                   counts->single_ref[i][1]);
1322       }
1323     }
1324 
1325     if (cm->reference_mode != SINGLE_REFERENCE)
1326       for (i = 0; i < REF_CONTEXTS; i++)
1327         vp9_cond_prob_diff_update(&header_bc, &fc->comp_ref_prob[i],
1328                                   counts->comp_ref[i]);
1329 
1330     for (i = 0; i < BLOCK_SIZE_GROUPS; ++i)
1331       prob_diff_update(vp9_intra_mode_tree, cm->fc->y_mode_prob[i],
1332                        counts->y_mode[i], INTRA_MODES, &header_bc);
1333 
1334     for (i = 0; i < PARTITION_CONTEXTS; ++i)
1335       prob_diff_update(vp9_partition_tree, fc->partition_prob[i],
1336                        counts->partition[i], PARTITION_TYPES, &header_bc);
1337 
1338     vp9_write_nmv_probs(cm, cm->allow_high_precision_mv, &header_bc,
1339                         &counts->mv);
1340   }
1341 
1342   vpx_stop_encode(&header_bc);
1343   assert(header_bc.pos <= 0xffff);
1344 
1345   return header_bc.pos;
1346 }
1347 
vp9_pack_bitstream(VP9_COMP * cpi,uint8_t * dest,size_t * size)1348 void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size) {
1349   uint8_t *data = dest;
1350   size_t first_part_size, uncompressed_hdr_size;
1351   struct vpx_write_bit_buffer wb = { data, 0 };
1352   struct vpx_write_bit_buffer saved_wb;
1353 
1354   write_uncompressed_header(cpi, &wb);
1355 
1356   // Skip the rest coding process if use show existing frame.
1357   if (cpi->common.show_existing_frame) return;
1358 
1359   saved_wb = wb;
1360   vpx_wb_write_literal(&wb, 0, 16);  // don't know in advance first part. size
1361 
1362   uncompressed_hdr_size = vpx_wb_bytes_written(&wb);
1363   data += uncompressed_hdr_size;
1364 
1365   vpx_clear_system_state();
1366 
1367   first_part_size = write_compressed_header(cpi, data);
1368   data += first_part_size;
1369   // TODO(jbb): Figure out what to do if first_part_size > 16 bits.
1370   vpx_wb_write_literal(&saved_wb, (int)first_part_size, 16);
1371 
1372   data += encode_tiles(cpi, data);
1373 
1374   *size = data - dest;
1375 }
1376