1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <math.h>
13 
14 #include "config/aom_config.h"
15 #include "config/aom_dsp_rtcd.h"
16 #include "config/av1_rtcd.h"
17 
18 #include "aom_dsp/aom_dsp_common.h"
19 #include "aom_mem/aom_mem.h"
20 #include "aom_ports/aom_once.h"
21 #include "aom_ports/mem.h"
22 #include "aom_ports/system_state.h"
23 #include "av1/common/reconintra.h"
24 #include "av1/common/onyxc_int.h"
25 #include "av1/common/cfl.h"
26 
27 enum {
28   NEED_LEFT = 1 << 1,
29   NEED_ABOVE = 1 << 2,
30   NEED_ABOVERIGHT = 1 << 3,
31   NEED_ABOVELEFT = 1 << 4,
32   NEED_BOTTOMLEFT = 1 << 5,
33 };
34 
35 #define INTRA_EDGE_FILT 3
36 #define INTRA_EDGE_TAPS 5
37 #define MAX_UPSAMPLE_SZ 16
38 
39 static const uint8_t extend_modes[INTRA_MODES] = {
40   NEED_ABOVE | NEED_LEFT,                   // DC
41   NEED_ABOVE,                               // V
42   NEED_LEFT,                                // H
43   NEED_ABOVE | NEED_ABOVERIGHT,             // D45
44   NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D135
45   NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D113
46   NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D157
47   NEED_LEFT | NEED_BOTTOMLEFT,              // D203
48   NEED_ABOVE | NEED_ABOVERIGHT,             // D67
49   NEED_LEFT | NEED_ABOVE,                   // SMOOTH
50   NEED_LEFT | NEED_ABOVE,                   // SMOOTH_V
51   NEED_LEFT | NEED_ABOVE,                   // SMOOTH_H
52   NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // PAETH
53 };
54 
55 // Tables to store if the top-right reference pixels are available. The flags
56 // are represented with bits, packed into 8-bit integers. E.g., for the 32x32
57 // blocks in a 128x128 superblock, the index of the "o" block is 10 (in raster
58 // order), so its flag is stored at the 3rd bit of the 2nd entry in the table,
59 // i.e. (table[10 / 8] >> (10 % 8)) & 1.
60 //       . . . .
61 //       . . . .
62 //       . . o .
63 //       . . . .
64 static uint8_t has_tr_4x4[128] = {
65   255, 255, 255, 255, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
66   127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
67   255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
68   127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
69   255, 255, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
70   127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
71   255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
72   127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
73 };
74 static uint8_t has_tr_4x8[64] = {
75   255, 255, 255, 255, 119, 119, 119, 119, 127, 127, 127, 127, 119,
76   119, 119, 119, 255, 127, 255, 127, 119, 119, 119, 119, 127, 127,
77   127, 127, 119, 119, 119, 119, 255, 255, 255, 127, 119, 119, 119,
78   119, 127, 127, 127, 127, 119, 119, 119, 119, 255, 127, 255, 127,
79   119, 119, 119, 119, 127, 127, 127, 127, 119, 119, 119, 119,
80 };
81 static uint8_t has_tr_8x4[64] = {
82   255, 255, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
83   127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
84   255, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
85   127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
86 };
87 static uint8_t has_tr_8x8[32] = {
88   255, 255, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
89   255, 127, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
90 };
91 static uint8_t has_tr_8x16[16] = {
92   255, 255, 119, 119, 127, 127, 119, 119,
93   255, 127, 119, 119, 127, 127, 119, 119,
94 };
95 static uint8_t has_tr_16x8[16] = {
96   255, 0, 85, 0, 119, 0, 85, 0, 127, 0, 85, 0, 119, 0, 85, 0,
97 };
98 static uint8_t has_tr_16x16[8] = {
99   255, 85, 119, 85, 127, 85, 119, 85,
100 };
101 static uint8_t has_tr_16x32[4] = { 255, 119, 127, 119 };
102 static uint8_t has_tr_32x16[4] = { 15, 5, 7, 5 };
103 static uint8_t has_tr_32x32[2] = { 95, 87 };
104 static uint8_t has_tr_32x64[1] = { 127 };
105 static uint8_t has_tr_64x32[1] = { 19 };
106 static uint8_t has_tr_64x64[1] = { 7 };
107 static uint8_t has_tr_64x128[1] = { 3 };
108 static uint8_t has_tr_128x64[1] = { 1 };
109 static uint8_t has_tr_128x128[1] = { 1 };
110 static uint8_t has_tr_4x16[32] = {
111   255, 255, 255, 255, 127, 127, 127, 127, 255, 127, 255,
112   127, 127, 127, 127, 127, 255, 255, 255, 127, 127, 127,
113   127, 127, 255, 127, 255, 127, 127, 127, 127, 127,
114 };
115 static uint8_t has_tr_16x4[32] = {
116   255, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
117   127, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
118 };
119 static uint8_t has_tr_8x32[8] = {
120   255, 255, 127, 127, 255, 127, 127, 127,
121 };
122 static uint8_t has_tr_32x8[8] = {
123   15, 0, 5, 0, 7, 0, 5, 0,
124 };
125 static uint8_t has_tr_16x64[2] = { 255, 127 };
126 static uint8_t has_tr_64x16[2] = { 3, 1 };
127 
128 static const uint8_t *const has_tr_tables[BLOCK_SIZES_ALL] = {
129   // 4X4
130   has_tr_4x4,
131   // 4X8,       8X4,            8X8
132   has_tr_4x8, has_tr_8x4, has_tr_8x8,
133   // 8X16,      16X8,           16X16
134   has_tr_8x16, has_tr_16x8, has_tr_16x16,
135   // 16X32,     32X16,          32X32
136   has_tr_16x32, has_tr_32x16, has_tr_32x32,
137   // 32X64,     64X32,          64X64
138   has_tr_32x64, has_tr_64x32, has_tr_64x64,
139   // 64x128,    128x64,         128x128
140   has_tr_64x128, has_tr_128x64, has_tr_128x128,
141   // 4x16,      16x4,            8x32
142   has_tr_4x16, has_tr_16x4, has_tr_8x32,
143   // 32x8,      16x64,           64x16
144   has_tr_32x8, has_tr_16x64, has_tr_64x16
145 };
146 
147 static uint8_t has_tr_vert_8x8[32] = {
148   255, 255, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
149   255, 127, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
150 };
151 static uint8_t has_tr_vert_16x16[8] = {
152   255, 0, 119, 0, 127, 0, 119, 0,
153 };
154 static uint8_t has_tr_vert_32x32[2] = { 15, 7 };
155 static uint8_t has_tr_vert_64x64[1] = { 3 };
156 
157 // The _vert_* tables are like the ordinary tables above, but describe the
158 // order we visit square blocks when doing a PARTITION_VERT_A or
159 // PARTITION_VERT_B. This is the same order as normal except for on the last
160 // split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
161 // as a pair of squares, which means that these tables work correctly for both
162 // mixed vertical partition types.
163 //
164 // There are tables for each of the square sizes. Vertical rectangles (like
165 // BLOCK_16X32) use their respective "non-vert" table
166 static const uint8_t *const has_tr_vert_tables[BLOCK_SIZES] = {
167   // 4X4
168   NULL,
169   // 4X8,      8X4,         8X8
170   has_tr_4x8, NULL, has_tr_vert_8x8,
171   // 8X16,     16X8,        16X16
172   has_tr_8x16, NULL, has_tr_vert_16x16,
173   // 16X32,    32X16,       32X32
174   has_tr_16x32, NULL, has_tr_vert_32x32,
175   // 32X64,    64X32,       64X64
176   has_tr_32x64, NULL, has_tr_vert_64x64,
177   // 64x128,   128x64,      128x128
178   has_tr_64x128, NULL, has_tr_128x128
179 };
180 
get_has_tr_table(PARTITION_TYPE partition,BLOCK_SIZE bsize)181 static const uint8_t *get_has_tr_table(PARTITION_TYPE partition,
182                                        BLOCK_SIZE bsize) {
183   const uint8_t *ret = NULL;
184   // If this is a mixed vertical partition, look up bsize in orders_vert.
185   if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
186     assert(bsize < BLOCK_SIZES);
187     ret = has_tr_vert_tables[bsize];
188   } else {
189     ret = has_tr_tables[bsize];
190   }
191   assert(ret);
192   return ret;
193 }
194 
has_top_right(const AV1_COMMON * cm,BLOCK_SIZE bsize,int mi_row,int mi_col,int top_available,int right_available,PARTITION_TYPE partition,TX_SIZE txsz,int row_off,int col_off,int ss_x,int ss_y)195 static int has_top_right(const AV1_COMMON *cm, BLOCK_SIZE bsize, int mi_row,
196                          int mi_col, int top_available, int right_available,
197                          PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
198                          int col_off, int ss_x, int ss_y) {
199   if (!top_available || !right_available) return 0;
200 
201   const int bw_unit = block_size_wide[bsize] >> tx_size_wide_log2[0];
202   const int plane_bw_unit = AOMMAX(bw_unit >> ss_x, 1);
203   const int top_right_count_unit = tx_size_wide_unit[txsz];
204 
205   if (row_off > 0) {  // Just need to check if enough pixels on the right.
206     if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64]) {
207       // Special case: For 128x128 blocks, the transform unit whose
208       // top-right corner is at the center of the block does in fact have
209       // pixels available at its top-right corner.
210       if (row_off == mi_size_high[BLOCK_64X64] >> ss_y &&
211           col_off + top_right_count_unit == mi_size_wide[BLOCK_64X64] >> ss_x) {
212         return 1;
213       }
214       const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
215       const int col_off_64 = col_off % plane_bw_unit_64;
216       return col_off_64 + top_right_count_unit < plane_bw_unit_64;
217     }
218     return col_off + top_right_count_unit < plane_bw_unit;
219   } else {
220     // All top-right pixels are in the block above, which is already available.
221     if (col_off + top_right_count_unit < plane_bw_unit) return 1;
222 
223     const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
224     const int bh_in_mi_log2 = mi_size_high_log2[bsize];
225     const int sb_mi_size = mi_size_high[cm->seq_params.sb_size];
226     const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
227     const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
228 
229     // Top row of superblock: so top-right pixels are in the top and/or
230     // top-right superblocks, both of which are already available.
231     if (blk_row_in_sb == 0) return 1;
232 
233     // Rightmost column of superblock (and not the top row): so top-right pixels
234     // fall in the right superblock, which is not available yet.
235     if (((blk_col_in_sb + 1) << bw_in_mi_log2) >= sb_mi_size) {
236       return 0;
237     }
238 
239     // General case (neither top row nor rightmost column): check if the
240     // top-right block is coded before the current block.
241     const int this_blk_index =
242         ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
243         blk_col_in_sb + 0;
244     const int idx1 = this_blk_index / 8;
245     const int idx2 = this_blk_index % 8;
246     const uint8_t *has_tr_table = get_has_tr_table(partition, bsize);
247     return (has_tr_table[idx1] >> idx2) & 1;
248   }
249 }
250 
251 // Similar to the has_tr_* tables, but store if the bottom-left reference
252 // pixels are available.
253 static uint8_t has_bl_4x4[128] = {
254   84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85, 85,
255   85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  1,  0,  84, 85, 85, 85, 16, 17,
256   17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85, 85, 85, 16, 17, 17, 17, 84,
257   85, 85, 85, 0,  0,  0,  0,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85,
258   0,  1,  1,  1,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  1,
259   0,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85,
260   85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  0,  0,
261 };
262 static uint8_t has_bl_4x8[64] = {
263   16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
264   16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
265   16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
266   16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
267 };
268 static uint8_t has_bl_8x4[64] = {
269   254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
270   254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
271   254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
272   254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
273 };
274 static uint8_t has_bl_8x8[32] = {
275   84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
276   84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
277 };
278 static uint8_t has_bl_8x16[16] = {
279   16, 17, 0, 1, 16, 17, 0, 0, 16, 17, 0, 1, 16, 17, 0, 0,
280 };
281 static uint8_t has_bl_16x8[16] = {
282   254, 84, 254, 16, 254, 84, 254, 0, 254, 84, 254, 16, 254, 84, 254, 0,
283 };
284 static uint8_t has_bl_16x16[8] = {
285   84, 16, 84, 0, 84, 16, 84, 0,
286 };
287 static uint8_t has_bl_16x32[4] = { 16, 0, 16, 0 };
288 static uint8_t has_bl_32x16[4] = { 78, 14, 78, 14 };
289 static uint8_t has_bl_32x32[2] = { 4, 4 };
290 static uint8_t has_bl_32x64[1] = { 0 };
291 static uint8_t has_bl_64x32[1] = { 34 };
292 static uint8_t has_bl_64x64[1] = { 0 };
293 static uint8_t has_bl_64x128[1] = { 0 };
294 static uint8_t has_bl_128x64[1] = { 0 };
295 static uint8_t has_bl_128x128[1] = { 0 };
296 static uint8_t has_bl_4x16[32] = {
297   0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
298   0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
299 };
300 static uint8_t has_bl_16x4[32] = {
301   254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
302   254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
303 };
304 static uint8_t has_bl_8x32[8] = {
305   0, 1, 0, 0, 0, 1, 0, 0,
306 };
307 static uint8_t has_bl_32x8[8] = {
308   238, 78, 238, 14, 238, 78, 238, 14,
309 };
310 static uint8_t has_bl_16x64[2] = { 0, 0 };
311 static uint8_t has_bl_64x16[2] = { 42, 42 };
312 
313 static const uint8_t *const has_bl_tables[BLOCK_SIZES_ALL] = {
314   // 4X4
315   has_bl_4x4,
316   // 4X8,         8X4,         8X8
317   has_bl_4x8, has_bl_8x4, has_bl_8x8,
318   // 8X16,        16X8,        16X16
319   has_bl_8x16, has_bl_16x8, has_bl_16x16,
320   // 16X32,       32X16,       32X32
321   has_bl_16x32, has_bl_32x16, has_bl_32x32,
322   // 32X64,       64X32,       64X64
323   has_bl_32x64, has_bl_64x32, has_bl_64x64,
324   // 64x128,      128x64,      128x128
325   has_bl_64x128, has_bl_128x64, has_bl_128x128,
326   // 4x16,        16x4,        8x32
327   has_bl_4x16, has_bl_16x4, has_bl_8x32,
328   // 32x8,        16x64,       64x16
329   has_bl_32x8, has_bl_16x64, has_bl_64x16
330 };
331 
332 static uint8_t has_bl_vert_8x8[32] = {
333   254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
334   254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
335 };
336 static uint8_t has_bl_vert_16x16[8] = {
337   254, 16, 254, 0, 254, 16, 254, 0,
338 };
339 static uint8_t has_bl_vert_32x32[2] = { 14, 14 };
340 static uint8_t has_bl_vert_64x64[1] = { 2 };
341 
342 // The _vert_* tables are like the ordinary tables above, but describe the
343 // order we visit square blocks when doing a PARTITION_VERT_A or
344 // PARTITION_VERT_B. This is the same order as normal except for on the last
345 // split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
346 // as a pair of squares, which means that these tables work correctly for both
347 // mixed vertical partition types.
348 //
349 // There are tables for each of the square sizes. Vertical rectangles (like
350 // BLOCK_16X32) use their respective "non-vert" table
351 static const uint8_t *const has_bl_vert_tables[BLOCK_SIZES] = {
352   // 4X4
353   NULL,
354   // 4X8,     8X4,         8X8
355   has_bl_4x8, NULL, has_bl_vert_8x8,
356   // 8X16,    16X8,        16X16
357   has_bl_8x16, NULL, has_bl_vert_16x16,
358   // 16X32,   32X16,       32X32
359   has_bl_16x32, NULL, has_bl_vert_32x32,
360   // 32X64,   64X32,       64X64
361   has_bl_32x64, NULL, has_bl_vert_64x64,
362   // 64x128,  128x64,      128x128
363   has_bl_64x128, NULL, has_bl_128x128
364 };
365 
get_has_bl_table(PARTITION_TYPE partition,BLOCK_SIZE bsize)366 static const uint8_t *get_has_bl_table(PARTITION_TYPE partition,
367                                        BLOCK_SIZE bsize) {
368   const uint8_t *ret = NULL;
369   // If this is a mixed vertical partition, look up bsize in orders_vert.
370   if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
371     assert(bsize < BLOCK_SIZES);
372     ret = has_bl_vert_tables[bsize];
373   } else {
374     ret = has_bl_tables[bsize];
375   }
376   assert(ret);
377   return ret;
378 }
379 
has_bottom_left(const AV1_COMMON * cm,BLOCK_SIZE bsize,int mi_row,int mi_col,int bottom_available,int left_available,PARTITION_TYPE partition,TX_SIZE txsz,int row_off,int col_off,int ss_x,int ss_y)380 static int has_bottom_left(const AV1_COMMON *cm, BLOCK_SIZE bsize, int mi_row,
381                            int mi_col, int bottom_available, int left_available,
382                            PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
383                            int col_off, int ss_x, int ss_y) {
384   if (!bottom_available || !left_available) return 0;
385 
386   // Special case for 128x* blocks, when col_off is half the block width.
387   // This is needed because 128x* superblocks are divided into 64x* blocks in
388   // raster order
389   if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64] && col_off > 0) {
390     const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
391     const int col_off_64 = col_off % plane_bw_unit_64;
392     if (col_off_64 == 0) {
393       // We are at the left edge of top-right or bottom-right 64x* block.
394       const int plane_bh_unit_64 = mi_size_high[BLOCK_64X64] >> ss_y;
395       const int row_off_64 = row_off % plane_bh_unit_64;
396       const int plane_bh_unit =
397           AOMMIN(mi_size_high[bsize] >> ss_y, plane_bh_unit_64);
398       // Check if all bottom-left pixels are in the left 64x* block (which is
399       // already coded).
400       return row_off_64 + tx_size_high_unit[txsz] < plane_bh_unit;
401     }
402   }
403 
404   if (col_off > 0) {
405     // Bottom-left pixels are in the bottom-left block, which is not available.
406     return 0;
407   } else {
408     const int bh_unit = block_size_high[bsize] >> tx_size_high_log2[0];
409     const int plane_bh_unit = AOMMAX(bh_unit >> ss_y, 1);
410     const int bottom_left_count_unit = tx_size_high_unit[txsz];
411 
412     // All bottom-left pixels are in the left block, which is already available.
413     if (row_off + bottom_left_count_unit < plane_bh_unit) return 1;
414 
415     const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
416     const int bh_in_mi_log2 = mi_size_high_log2[bsize];
417     const int sb_mi_size = mi_size_high[cm->seq_params.sb_size];
418     const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
419     const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
420 
421     // Leftmost column of superblock: so bottom-left pixels maybe in the left
422     // and/or bottom-left superblocks. But only the left superblock is
423     // available, so check if all required pixels fall in that superblock.
424     if (blk_col_in_sb == 0) {
425       const int blk_start_row_off = blk_row_in_sb
426                                         << (bh_in_mi_log2 + MI_SIZE_LOG2 -
427                                             tx_size_wide_log2[0]) >>
428                                     ss_y;
429       const int row_off_in_sb = blk_start_row_off + row_off;
430       const int sb_height_unit = sb_mi_size >> ss_y;
431       return row_off_in_sb + bottom_left_count_unit < sb_height_unit;
432     }
433 
434     // Bottom row of superblock (and not the leftmost column): so bottom-left
435     // pixels fall in the bottom superblock, which is not available yet.
436     if (((blk_row_in_sb + 1) << bh_in_mi_log2) >= sb_mi_size) return 0;
437 
438     // General case (neither leftmost column nor bottom row): check if the
439     // bottom-left block is coded before the current block.
440     const int this_blk_index =
441         ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
442         blk_col_in_sb + 0;
443     const int idx1 = this_blk_index / 8;
444     const int idx2 = this_blk_index % 8;
445     const uint8_t *has_bl_table = get_has_bl_table(partition, bsize);
446     return (has_bl_table[idx1] >> idx2) & 1;
447   }
448 }
449 
450 typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
451                               const uint8_t *above, const uint8_t *left);
452 
453 static intra_pred_fn pred[INTRA_MODES][TX_SIZES_ALL];
454 static intra_pred_fn dc_pred[2][2][TX_SIZES_ALL];
455 
456 typedef void (*intra_high_pred_fn)(uint16_t *dst, ptrdiff_t stride,
457                                    const uint16_t *above, const uint16_t *left,
458                                    int bd);
459 static intra_high_pred_fn pred_high[INTRA_MODES][TX_SIZES_ALL];
460 static intra_high_pred_fn dc_pred_high[2][2][TX_SIZES_ALL];
461 
init_intra_predictors_internal(void)462 static void init_intra_predictors_internal(void) {
463   assert(NELEMENTS(mode_to_angle_map) == INTRA_MODES);
464 
465 #define INIT_RECTANGULAR(p, type)             \
466   p[TX_4X8] = aom_##type##_predictor_4x8;     \
467   p[TX_8X4] = aom_##type##_predictor_8x4;     \
468   p[TX_8X16] = aom_##type##_predictor_8x16;   \
469   p[TX_16X8] = aom_##type##_predictor_16x8;   \
470   p[TX_16X32] = aom_##type##_predictor_16x32; \
471   p[TX_32X16] = aom_##type##_predictor_32x16; \
472   p[TX_32X64] = aom_##type##_predictor_32x64; \
473   p[TX_64X32] = aom_##type##_predictor_64x32; \
474   p[TX_4X16] = aom_##type##_predictor_4x16;   \
475   p[TX_16X4] = aom_##type##_predictor_16x4;   \
476   p[TX_8X32] = aom_##type##_predictor_8x32;   \
477   p[TX_32X8] = aom_##type##_predictor_32x8;   \
478   p[TX_16X64] = aom_##type##_predictor_16x64; \
479   p[TX_64X16] = aom_##type##_predictor_64x16;
480 
481 #define INIT_NO_4X4(p, type)                  \
482   p[TX_8X8] = aom_##type##_predictor_8x8;     \
483   p[TX_16X16] = aom_##type##_predictor_16x16; \
484   p[TX_32X32] = aom_##type##_predictor_32x32; \
485   p[TX_64X64] = aom_##type##_predictor_64x64; \
486   INIT_RECTANGULAR(p, type)
487 
488 #define INIT_ALL_SIZES(p, type)           \
489   p[TX_4X4] = aom_##type##_predictor_4x4; \
490   INIT_NO_4X4(p, type)
491 
492   INIT_ALL_SIZES(pred[V_PRED], v);
493   INIT_ALL_SIZES(pred[H_PRED], h);
494   INIT_ALL_SIZES(pred[PAETH_PRED], paeth);
495   INIT_ALL_SIZES(pred[SMOOTH_PRED], smooth);
496   INIT_ALL_SIZES(pred[SMOOTH_V_PRED], smooth_v);
497   INIT_ALL_SIZES(pred[SMOOTH_H_PRED], smooth_h);
498   INIT_ALL_SIZES(dc_pred[0][0], dc_128);
499   INIT_ALL_SIZES(dc_pred[0][1], dc_top);
500   INIT_ALL_SIZES(dc_pred[1][0], dc_left);
501   INIT_ALL_SIZES(dc_pred[1][1], dc);
502 
503   INIT_ALL_SIZES(pred_high[V_PRED], highbd_v);
504   INIT_ALL_SIZES(pred_high[H_PRED], highbd_h);
505   INIT_ALL_SIZES(pred_high[PAETH_PRED], highbd_paeth);
506   INIT_ALL_SIZES(pred_high[SMOOTH_PRED], highbd_smooth);
507   INIT_ALL_SIZES(pred_high[SMOOTH_V_PRED], highbd_smooth_v);
508   INIT_ALL_SIZES(pred_high[SMOOTH_H_PRED], highbd_smooth_h);
509   INIT_ALL_SIZES(dc_pred_high[0][0], highbd_dc_128);
510   INIT_ALL_SIZES(dc_pred_high[0][1], highbd_dc_top);
511   INIT_ALL_SIZES(dc_pred_high[1][0], highbd_dc_left);
512   INIT_ALL_SIZES(dc_pred_high[1][1], highbd_dc);
513 #undef intra_pred_allsizes
514 }
515 
516 // Directional prediction, zone 1: 0 < angle < 90
av1_dr_prediction_z1_c(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left,int upsample_above,int dx,int dy)517 void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
518                             const uint8_t *above, const uint8_t *left,
519                             int upsample_above, int dx, int dy) {
520   int r, c, x, base, shift, val;
521 
522   (void)left;
523   (void)dy;
524   assert(dy == 1);
525   assert(dx > 0);
526 
527   const int max_base_x = ((bw + bh) - 1) << upsample_above;
528   const int frac_bits = 6 - upsample_above;
529   const int base_inc = 1 << upsample_above;
530   x = dx;
531   for (r = 0; r < bh; ++r, dst += stride, x += dx) {
532     base = x >> frac_bits;
533     shift = ((x << upsample_above) & 0x3F) >> 1;
534 
535     if (base >= max_base_x) {
536       for (int i = r; i < bh; ++i) {
537         memset(dst, above[max_base_x], bw * sizeof(dst[0]));
538         dst += stride;
539       }
540       return;
541     }
542 
543     for (c = 0; c < bw; ++c, base += base_inc) {
544       if (base < max_base_x) {
545         val = above[base] * (32 - shift) + above[base + 1] * shift;
546         dst[c] = ROUND_POWER_OF_TWO(val, 5);
547       } else {
548         dst[c] = above[max_base_x];
549       }
550     }
551   }
552 }
553 
554 // Directional prediction, zone 2: 90 < angle < 180
av1_dr_prediction_z2_c(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left,int upsample_above,int upsample_left,int dx,int dy)555 void av1_dr_prediction_z2_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
556                             const uint8_t *above, const uint8_t *left,
557                             int upsample_above, int upsample_left, int dx,
558                             int dy) {
559   assert(dx > 0);
560   assert(dy > 0);
561 
562   const int min_base_x = -(1 << upsample_above);
563   const int min_base_y = -(1 << upsample_left);
564   (void)min_base_y;
565   const int frac_bits_x = 6 - upsample_above;
566   const int frac_bits_y = 6 - upsample_left;
567 
568   for (int r = 0; r < bh; ++r) {
569     for (int c = 0; c < bw; ++c) {
570       int val;
571       int y = r + 1;
572       int x = (c << 6) - y * dx;
573       const int base_x = x >> frac_bits_x;
574       if (base_x >= min_base_x) {
575         const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
576         val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
577         val = ROUND_POWER_OF_TWO(val, 5);
578       } else {
579         x = c + 1;
580         y = (r << 6) - x * dy;
581         const int base_y = y >> frac_bits_y;
582         assert(base_y >= min_base_y);
583         const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
584         val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
585         val = ROUND_POWER_OF_TWO(val, 5);
586       }
587       dst[c] = val;
588     }
589     dst += stride;
590   }
591 }
592 
593 // Directional prediction, zone 3: 180 < angle < 270
av1_dr_prediction_z3_c(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left,int upsample_left,int dx,int dy)594 void av1_dr_prediction_z3_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
595                             const uint8_t *above, const uint8_t *left,
596                             int upsample_left, int dx, int dy) {
597   int r, c, y, base, shift, val;
598 
599   (void)above;
600   (void)dx;
601 
602   assert(dx == 1);
603   assert(dy > 0);
604 
605   const int max_base_y = (bw + bh - 1) << upsample_left;
606   const int frac_bits = 6 - upsample_left;
607   const int base_inc = 1 << upsample_left;
608   y = dy;
609   for (c = 0; c < bw; ++c, y += dy) {
610     base = y >> frac_bits;
611     shift = ((y << upsample_left) & 0x3F) >> 1;
612 
613     for (r = 0; r < bh; ++r, base += base_inc) {
614       if (base < max_base_y) {
615         val = left[base] * (32 - shift) + left[base + 1] * shift;
616         dst[r * stride + c] = val = ROUND_POWER_OF_TWO(val, 5);
617       } else {
618         for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
619         break;
620       }
621     }
622   }
623 }
624 
dr_predictor(uint8_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint8_t * above,const uint8_t * left,int upsample_above,int upsample_left,int angle)625 static void dr_predictor(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
626                          const uint8_t *above, const uint8_t *left,
627                          int upsample_above, int upsample_left, int angle) {
628   const int dx = av1_get_dx(angle);
629   const int dy = av1_get_dy(angle);
630   const int bw = tx_size_wide[tx_size];
631   const int bh = tx_size_high[tx_size];
632   assert(angle > 0 && angle < 270);
633 
634   if (angle > 0 && angle < 90) {
635     av1_dr_prediction_z1(dst, stride, bw, bh, above, left, upsample_above, dx,
636                          dy);
637   } else if (angle > 90 && angle < 180) {
638     av1_dr_prediction_z2(dst, stride, bw, bh, above, left, upsample_above,
639                          upsample_left, dx, dy);
640   } else if (angle > 180 && angle < 270) {
641     av1_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left, dx,
642                          dy);
643   } else if (angle == 90) {
644     pred[V_PRED][tx_size](dst, stride, above, left);
645   } else if (angle == 180) {
646     pred[H_PRED][tx_size](dst, stride, above, left);
647   }
648 }
649 
650 // Directional prediction, zone 1: 0 < angle < 90
av1_highbd_dr_prediction_z1_c(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int upsample_above,int dx,int dy,int bd)651 void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw,
652                                    int bh, const uint16_t *above,
653                                    const uint16_t *left, int upsample_above,
654                                    int dx, int dy, int bd) {
655   int r, c, x, base, shift, val;
656 
657   (void)left;
658   (void)dy;
659   (void)bd;
660   assert(dy == 1);
661   assert(dx > 0);
662 
663   const int max_base_x = ((bw + bh) - 1) << upsample_above;
664   const int frac_bits = 6 - upsample_above;
665   const int base_inc = 1 << upsample_above;
666   x = dx;
667   for (r = 0; r < bh; ++r, dst += stride, x += dx) {
668     base = x >> frac_bits;
669     shift = ((x << upsample_above) & 0x3F) >> 1;
670 
671     if (base >= max_base_x) {
672       for (int i = r; i < bh; ++i) {
673         aom_memset16(dst, above[max_base_x], bw);
674         dst += stride;
675       }
676       return;
677     }
678 
679     for (c = 0; c < bw; ++c, base += base_inc) {
680       if (base < max_base_x) {
681         val = above[base] * (32 - shift) + above[base + 1] * shift;
682         dst[c] = ROUND_POWER_OF_TWO(val, 5);
683       } else {
684         dst[c] = above[max_base_x];
685       }
686     }
687   }
688 }
689 
690 // Directional prediction, zone 2: 90 < angle < 180
av1_highbd_dr_prediction_z2_c(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int upsample_above,int upsample_left,int dx,int dy,int bd)691 void av1_highbd_dr_prediction_z2_c(uint16_t *dst, ptrdiff_t stride, int bw,
692                                    int bh, const uint16_t *above,
693                                    const uint16_t *left, int upsample_above,
694                                    int upsample_left, int dx, int dy, int bd) {
695   (void)bd;
696   assert(dx > 0);
697   assert(dy > 0);
698 
699   const int min_base_x = -(1 << upsample_above);
700   const int min_base_y = -(1 << upsample_left);
701   (void)min_base_y;
702   const int frac_bits_x = 6 - upsample_above;
703   const int frac_bits_y = 6 - upsample_left;
704 
705   for (int r = 0; r < bh; ++r) {
706     for (int c = 0; c < bw; ++c) {
707       int val;
708       int y = r + 1;
709       int x = (c << 6) - y * dx;
710       const int base_x = x >> frac_bits_x;
711       if (base_x >= min_base_x) {
712         const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
713         val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
714         val = ROUND_POWER_OF_TWO(val, 5);
715       } else {
716         x = c + 1;
717         y = (r << 6) - x * dy;
718         const int base_y = y >> frac_bits_y;
719         assert(base_y >= min_base_y);
720         const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
721         val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
722         val = ROUND_POWER_OF_TWO(val, 5);
723       }
724       dst[c] = val;
725     }
726     dst += stride;
727   }
728 }
729 
730 // Directional prediction, zone 3: 180 < angle < 270
av1_highbd_dr_prediction_z3_c(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int upsample_left,int dx,int dy,int bd)731 void av1_highbd_dr_prediction_z3_c(uint16_t *dst, ptrdiff_t stride, int bw,
732                                    int bh, const uint16_t *above,
733                                    const uint16_t *left, int upsample_left,
734                                    int dx, int dy, int bd) {
735   int r, c, y, base, shift, val;
736 
737   (void)above;
738   (void)dx;
739   (void)bd;
740   assert(dx == 1);
741   assert(dy > 0);
742 
743   const int max_base_y = (bw + bh - 1) << upsample_left;
744   const int frac_bits = 6 - upsample_left;
745   const int base_inc = 1 << upsample_left;
746   y = dy;
747   for (c = 0; c < bw; ++c, y += dy) {
748     base = y >> frac_bits;
749     shift = ((y << upsample_left) & 0x3F) >> 1;
750 
751     for (r = 0; r < bh; ++r, base += base_inc) {
752       if (base < max_base_y) {
753         val = left[base] * (32 - shift) + left[base + 1] * shift;
754         dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5);
755       } else {
756         for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
757         break;
758       }
759     }
760   }
761 }
762 
highbd_dr_predictor(uint16_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint16_t * above,const uint16_t * left,int upsample_above,int upsample_left,int angle,int bd)763 static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride,
764                                 TX_SIZE tx_size, const uint16_t *above,
765                                 const uint16_t *left, int upsample_above,
766                                 int upsample_left, int angle, int bd) {
767   const int dx = av1_get_dx(angle);
768   const int dy = av1_get_dy(angle);
769   const int bw = tx_size_wide[tx_size];
770   const int bh = tx_size_high[tx_size];
771   assert(angle > 0 && angle < 270);
772 
773   if (angle > 0 && angle < 90) {
774     av1_highbd_dr_prediction_z1(dst, stride, bw, bh, above, left,
775                                 upsample_above, dx, dy, bd);
776   } else if (angle > 90 && angle < 180) {
777     av1_highbd_dr_prediction_z2(dst, stride, bw, bh, above, left,
778                                 upsample_above, upsample_left, dx, dy, bd);
779   } else if (angle > 180 && angle < 270) {
780     av1_highbd_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left,
781                                 dx, dy, bd);
782   } else if (angle == 90) {
783     pred_high[V_PRED][tx_size](dst, stride, above, left, bd);
784   } else if (angle == 180) {
785     pred_high[H_PRED][tx_size](dst, stride, above, left, bd);
786   }
787 }
788 
789 DECLARE_ALIGNED(16, const int8_t,
790                 av1_filter_intra_taps[FILTER_INTRA_MODES][8][8]) = {
791   {
792       { -6, 10, 0, 0, 0, 12, 0, 0 },
793       { -5, 2, 10, 0, 0, 9, 0, 0 },
794       { -3, 1, 1, 10, 0, 7, 0, 0 },
795       { -3, 1, 1, 2, 10, 5, 0, 0 },
796       { -4, 6, 0, 0, 0, 2, 12, 0 },
797       { -3, 2, 6, 0, 0, 2, 9, 0 },
798       { -3, 2, 2, 6, 0, 2, 7, 0 },
799       { -3, 1, 2, 2, 6, 3, 5, 0 },
800   },
801   {
802       { -10, 16, 0, 0, 0, 10, 0, 0 },
803       { -6, 0, 16, 0, 0, 6, 0, 0 },
804       { -4, 0, 0, 16, 0, 4, 0, 0 },
805       { -2, 0, 0, 0, 16, 2, 0, 0 },
806       { -10, 16, 0, 0, 0, 0, 10, 0 },
807       { -6, 0, 16, 0, 0, 0, 6, 0 },
808       { -4, 0, 0, 16, 0, 0, 4, 0 },
809       { -2, 0, 0, 0, 16, 0, 2, 0 },
810   },
811   {
812       { -8, 8, 0, 0, 0, 16, 0, 0 },
813       { -8, 0, 8, 0, 0, 16, 0, 0 },
814       { -8, 0, 0, 8, 0, 16, 0, 0 },
815       { -8, 0, 0, 0, 8, 16, 0, 0 },
816       { -4, 4, 0, 0, 0, 0, 16, 0 },
817       { -4, 0, 4, 0, 0, 0, 16, 0 },
818       { -4, 0, 0, 4, 0, 0, 16, 0 },
819       { -4, 0, 0, 0, 4, 0, 16, 0 },
820   },
821   {
822       { -2, 8, 0, 0, 0, 10, 0, 0 },
823       { -1, 3, 8, 0, 0, 6, 0, 0 },
824       { -1, 2, 3, 8, 0, 4, 0, 0 },
825       { 0, 1, 2, 3, 8, 2, 0, 0 },
826       { -1, 4, 0, 0, 0, 3, 10, 0 },
827       { -1, 3, 4, 0, 0, 4, 6, 0 },
828       { -1, 2, 3, 4, 0, 4, 4, 0 },
829       { -1, 2, 2, 3, 4, 3, 3, 0 },
830   },
831   {
832       { -12, 14, 0, 0, 0, 14, 0, 0 },
833       { -10, 0, 14, 0, 0, 12, 0, 0 },
834       { -9, 0, 0, 14, 0, 11, 0, 0 },
835       { -8, 0, 0, 0, 14, 10, 0, 0 },
836       { -10, 12, 0, 0, 0, 0, 14, 0 },
837       { -9, 1, 12, 0, 0, 0, 12, 0 },
838       { -8, 0, 0, 12, 0, 1, 11, 0 },
839       { -7, 0, 0, 1, 12, 1, 9, 0 },
840   },
841 };
842 
av1_filter_intra_predictor_c(uint8_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint8_t * above,const uint8_t * left,int mode)843 void av1_filter_intra_predictor_c(uint8_t *dst, ptrdiff_t stride,
844                                   TX_SIZE tx_size, const uint8_t *above,
845                                   const uint8_t *left, int mode) {
846   int r, c;
847   uint8_t buffer[33][33];
848   const int bw = tx_size_wide[tx_size];
849   const int bh = tx_size_high[tx_size];
850 
851   assert(bw <= 32 && bh <= 32);
852 
853   // The initialization is just for silencing Jenkins static analysis warnings
854   for (r = 0; r < bh + 1; ++r)
855     memset(buffer[r], 0, (bw + 1) * sizeof(buffer[0][0]));
856 
857   for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
858   memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(uint8_t));
859 
860   for (r = 1; r < bh + 1; r += 2)
861     for (c = 1; c < bw + 1; c += 4) {
862       const uint8_t p0 = buffer[r - 1][c - 1];
863       const uint8_t p1 = buffer[r - 1][c];
864       const uint8_t p2 = buffer[r - 1][c + 1];
865       const uint8_t p3 = buffer[r - 1][c + 2];
866       const uint8_t p4 = buffer[r - 1][c + 3];
867       const uint8_t p5 = buffer[r][c - 1];
868       const uint8_t p6 = buffer[r + 1][c - 1];
869       for (int k = 0; k < 8; ++k) {
870         int r_offset = k >> 2;
871         int c_offset = k & 0x03;
872         buffer[r + r_offset][c + c_offset] =
873             clip_pixel(ROUND_POWER_OF_TWO_SIGNED(
874                 av1_filter_intra_taps[mode][k][0] * p0 +
875                     av1_filter_intra_taps[mode][k][1] * p1 +
876                     av1_filter_intra_taps[mode][k][2] * p2 +
877                     av1_filter_intra_taps[mode][k][3] * p3 +
878                     av1_filter_intra_taps[mode][k][4] * p4 +
879                     av1_filter_intra_taps[mode][k][5] * p5 +
880                     av1_filter_intra_taps[mode][k][6] * p6,
881                 FILTER_INTRA_SCALE_BITS));
882       }
883     }
884 
885   for (r = 0; r < bh; ++r) {
886     memcpy(dst, &buffer[r + 1][1], bw * sizeof(uint8_t));
887     dst += stride;
888   }
889 }
890 
highbd_filter_intra_predictor(uint16_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint16_t * above,const uint16_t * left,int mode,int bd)891 static void highbd_filter_intra_predictor(uint16_t *dst, ptrdiff_t stride,
892                                           TX_SIZE tx_size,
893                                           const uint16_t *above,
894                                           const uint16_t *left, int mode,
895                                           int bd) {
896   int r, c;
897   uint16_t buffer[33][33];
898   const int bw = tx_size_wide[tx_size];
899   const int bh = tx_size_high[tx_size];
900 
901   assert(bw <= 32 && bh <= 32);
902 
903   // The initialization is just for silencing Jenkins static analysis warnings
904   for (r = 0; r < bh + 1; ++r)
905     memset(buffer[r], 0, (bw + 1) * sizeof(buffer[0][0]));
906 
907   for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
908   memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(buffer[0][0]));
909 
910   for (r = 1; r < bh + 1; r += 2)
911     for (c = 1; c < bw + 1; c += 4) {
912       const uint16_t p0 = buffer[r - 1][c - 1];
913       const uint16_t p1 = buffer[r - 1][c];
914       const uint16_t p2 = buffer[r - 1][c + 1];
915       const uint16_t p3 = buffer[r - 1][c + 2];
916       const uint16_t p4 = buffer[r - 1][c + 3];
917       const uint16_t p5 = buffer[r][c - 1];
918       const uint16_t p6 = buffer[r + 1][c - 1];
919       for (int k = 0; k < 8; ++k) {
920         int r_offset = k >> 2;
921         int c_offset = k & 0x03;
922         buffer[r + r_offset][c + c_offset] =
923             clip_pixel_highbd(ROUND_POWER_OF_TWO_SIGNED(
924                                   av1_filter_intra_taps[mode][k][0] * p0 +
925                                       av1_filter_intra_taps[mode][k][1] * p1 +
926                                       av1_filter_intra_taps[mode][k][2] * p2 +
927                                       av1_filter_intra_taps[mode][k][3] * p3 +
928                                       av1_filter_intra_taps[mode][k][4] * p4 +
929                                       av1_filter_intra_taps[mode][k][5] * p5 +
930                                       av1_filter_intra_taps[mode][k][6] * p6,
931                                   FILTER_INTRA_SCALE_BITS),
932                               bd);
933       }
934     }
935 
936   for (r = 0; r < bh; ++r) {
937     memcpy(dst, &buffer[r + 1][1], bw * sizeof(dst[0]));
938     dst += stride;
939   }
940 }
941 
is_smooth(const MB_MODE_INFO * mbmi,int plane)942 static int is_smooth(const MB_MODE_INFO *mbmi, int plane) {
943   if (plane == 0) {
944     const PREDICTION_MODE mode = mbmi->mode;
945     return (mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
946             mode == SMOOTH_H_PRED);
947   } else {
948     // uv_mode is not set for inter blocks, so need to explicitly
949     // detect that case.
950     if (is_inter_block(mbmi)) return 0;
951 
952     const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
953     return (uv_mode == UV_SMOOTH_PRED || uv_mode == UV_SMOOTH_V_PRED ||
954             uv_mode == UV_SMOOTH_H_PRED);
955   }
956 }
957 
get_filt_type(const MACROBLOCKD * xd,int plane)958 static int get_filt_type(const MACROBLOCKD *xd, int plane) {
959   int ab_sm, le_sm;
960 
961   if (plane == 0) {
962     const MB_MODE_INFO *ab = xd->above_mbmi;
963     const MB_MODE_INFO *le = xd->left_mbmi;
964     ab_sm = ab ? is_smooth(ab, plane) : 0;
965     le_sm = le ? is_smooth(le, plane) : 0;
966   } else {
967     const MB_MODE_INFO *ab = xd->chroma_above_mbmi;
968     const MB_MODE_INFO *le = xd->chroma_left_mbmi;
969     ab_sm = ab ? is_smooth(ab, plane) : 0;
970     le_sm = le ? is_smooth(le, plane) : 0;
971   }
972 
973   return (ab_sm || le_sm) ? 1 : 0;
974 }
975 
intra_edge_filter_strength(int bs0,int bs1,int delta,int type)976 static int intra_edge_filter_strength(int bs0, int bs1, int delta, int type) {
977   const int d = abs(delta);
978   int strength = 0;
979 
980   const int blk_wh = bs0 + bs1;
981   if (type == 0) {
982     if (blk_wh <= 8) {
983       if (d >= 56) strength = 1;
984     } else if (blk_wh <= 12) {
985       if (d >= 40) strength = 1;
986     } else if (blk_wh <= 16) {
987       if (d >= 40) strength = 1;
988     } else if (blk_wh <= 24) {
989       if (d >= 8) strength = 1;
990       if (d >= 16) strength = 2;
991       if (d >= 32) strength = 3;
992     } else if (blk_wh <= 32) {
993       if (d >= 1) strength = 1;
994       if (d >= 4) strength = 2;
995       if (d >= 32) strength = 3;
996     } else {
997       if (d >= 1) strength = 3;
998     }
999   } else {
1000     if (blk_wh <= 8) {
1001       if (d >= 40) strength = 1;
1002       if (d >= 64) strength = 2;
1003     } else if (blk_wh <= 16) {
1004       if (d >= 20) strength = 1;
1005       if (d >= 48) strength = 2;
1006     } else if (blk_wh <= 24) {
1007       if (d >= 4) strength = 3;
1008     } else {
1009       if (d >= 1) strength = 3;
1010     }
1011   }
1012   return strength;
1013 }
1014 
av1_filter_intra_edge_c(uint8_t * p,int sz,int strength)1015 void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength) {
1016   if (!strength) return;
1017 
1018   const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = {
1019     { 0, 4, 8, 4, 0 }, { 0, 5, 6, 5, 0 }, { 2, 4, 4, 4, 2 }
1020   };
1021   const int filt = strength - 1;
1022   uint8_t edge[129];
1023 
1024   memcpy(edge, p, sz * sizeof(*p));
1025   for (int i = 1; i < sz; i++) {
1026     int s = 0;
1027     for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1028       int k = i - 2 + j;
1029       k = (k < 0) ? 0 : k;
1030       k = (k > sz - 1) ? sz - 1 : k;
1031       s += edge[k] * kernel[filt][j];
1032     }
1033     s = (s + 8) >> 4;
1034     p[i] = s;
1035   }
1036 }
1037 
filter_intra_edge_corner(uint8_t * p_above,uint8_t * p_left)1038 static void filter_intra_edge_corner(uint8_t *p_above, uint8_t *p_left) {
1039   const int kernel[3] = { 5, 6, 5 };
1040 
1041   int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1042           (p_above[0] * kernel[2]);
1043   s = (s + 8) >> 4;
1044   p_above[-1] = s;
1045   p_left[-1] = s;
1046 }
1047 
av1_filter_intra_edge_high_c(uint16_t * p,int sz,int strength)1048 void av1_filter_intra_edge_high_c(uint16_t *p, int sz, int strength) {
1049   if (!strength) return;
1050 
1051   const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = {
1052     { 0, 4, 8, 4, 0 }, { 0, 5, 6, 5, 0 }, { 2, 4, 4, 4, 2 }
1053   };
1054   const int filt = strength - 1;
1055   uint16_t edge[129];
1056 
1057   memcpy(edge, p, sz * sizeof(*p));
1058   for (int i = 1; i < sz; i++) {
1059     int s = 0;
1060     for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1061       int k = i - 2 + j;
1062       k = (k < 0) ? 0 : k;
1063       k = (k > sz - 1) ? sz - 1 : k;
1064       s += edge[k] * kernel[filt][j];
1065     }
1066     s = (s + 8) >> 4;
1067     p[i] = s;
1068   }
1069 }
1070 
filter_intra_edge_corner_high(uint16_t * p_above,uint16_t * p_left)1071 static void filter_intra_edge_corner_high(uint16_t *p_above, uint16_t *p_left) {
1072   const int kernel[3] = { 5, 6, 5 };
1073 
1074   int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1075           (p_above[0] * kernel[2]);
1076   s = (s + 8) >> 4;
1077   p_above[-1] = s;
1078   p_left[-1] = s;
1079 }
1080 
av1_upsample_intra_edge_c(uint8_t * p,int sz)1081 void av1_upsample_intra_edge_c(uint8_t *p, int sz) {
1082   // interpolate half-sample positions
1083   assert(sz <= MAX_UPSAMPLE_SZ);
1084 
1085   uint8_t in[MAX_UPSAMPLE_SZ + 3];
1086   // copy p[-1..(sz-1)] and extend first and last samples
1087   in[0] = p[-1];
1088   in[1] = p[-1];
1089   for (int i = 0; i < sz; i++) {
1090     in[i + 2] = p[i];
1091   }
1092   in[sz + 2] = p[sz - 1];
1093 
1094   // interpolate half-sample edge positions
1095   p[-2] = in[0];
1096   for (int i = 0; i < sz; i++) {
1097     int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1098     s = clip_pixel((s + 8) >> 4);
1099     p[2 * i - 1] = s;
1100     p[2 * i] = in[i + 2];
1101   }
1102 }
1103 
av1_upsample_intra_edge_high_c(uint16_t * p,int sz,int bd)1104 void av1_upsample_intra_edge_high_c(uint16_t *p, int sz, int bd) {
1105   // interpolate half-sample positions
1106   assert(sz <= MAX_UPSAMPLE_SZ);
1107 
1108   uint16_t in[MAX_UPSAMPLE_SZ + 3];
1109   // copy p[-1..(sz-1)] and extend first and last samples
1110   in[0] = p[-1];
1111   in[1] = p[-1];
1112   for (int i = 0; i < sz; i++) {
1113     in[i + 2] = p[i];
1114   }
1115   in[sz + 2] = p[sz - 1];
1116 
1117   // interpolate half-sample edge positions
1118   p[-2] = in[0];
1119   for (int i = 0; i < sz; i++) {
1120     int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1121     s = (s + 8) >> 4;
1122     s = clip_pixel_highbd(s, bd);
1123     p[2 * i - 1] = s;
1124     p[2 * i] = in[i + 2];
1125   }
1126 }
1127 
build_intra_predictors_high(const MACROBLOCKD * xd,const uint8_t * ref8,int ref_stride,uint8_t * dst8,int dst_stride,PREDICTION_MODE mode,int angle_delta,FILTER_INTRA_MODE filter_intra_mode,TX_SIZE tx_size,int disable_edge_filter,int n_top_px,int n_topright_px,int n_left_px,int n_bottomleft_px,int plane)1128 static void build_intra_predictors_high(
1129     const MACROBLOCKD *xd, const uint8_t *ref8, int ref_stride, uint8_t *dst8,
1130     int dst_stride, PREDICTION_MODE mode, int angle_delta,
1131     FILTER_INTRA_MODE filter_intra_mode, TX_SIZE tx_size,
1132     int disable_edge_filter, int n_top_px, int n_topright_px, int n_left_px,
1133     int n_bottomleft_px, int plane) {
1134   int i;
1135   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
1136   uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
1137   DECLARE_ALIGNED(16, uint16_t, left_data[MAX_TX_SIZE * 2 + 32]);
1138   DECLARE_ALIGNED(16, uint16_t, above_data[MAX_TX_SIZE * 2 + 32]);
1139   uint16_t *const above_row = above_data + 16;
1140   uint16_t *const left_col = left_data + 16;
1141   const int txwpx = tx_size_wide[tx_size];
1142   const int txhpx = tx_size_high[tx_size];
1143   int need_left = extend_modes[mode] & NEED_LEFT;
1144   int need_above = extend_modes[mode] & NEED_ABOVE;
1145   int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1146   const uint16_t *above_ref = ref - ref_stride;
1147   const uint16_t *left_ref = ref - 1;
1148   int p_angle = 0;
1149   const int is_dr_mode = av1_is_directional_mode(mode);
1150   const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1151   int base = 128 << (xd->bd - 8);
1152 
1153   // The default values if ref pixels are not available:
1154   // base-1 base-1 base-1 .. base-1 base-1 base-1 base-1 base-1 base-1
1155   // base+1   A      B  ..     Y      Z
1156   // base+1   C      D  ..     W      X
1157   // base+1   E      F  ..     U      V
1158   // base+1   G      H  ..     S      T      T      T      T      T
1159 
1160   if (is_dr_mode) {
1161     p_angle = mode_to_angle_map[mode] + angle_delta;
1162     if (p_angle <= 90)
1163       need_above = 1, need_left = 0, need_above_left = 1;
1164     else if (p_angle < 180)
1165       need_above = 1, need_left = 1, need_above_left = 1;
1166     else
1167       need_above = 0, need_left = 1, need_above_left = 1;
1168   }
1169   if (use_filter_intra) need_left = need_above = need_above_left = 1;
1170 
1171   assert(n_top_px >= 0);
1172   assert(n_topright_px >= 0);
1173   assert(n_left_px >= 0);
1174   assert(n_bottomleft_px >= 0);
1175 
1176   if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1177     int val;
1178     if (need_left) {
1179       val = (n_top_px > 0) ? above_ref[0] : base + 1;
1180     } else {
1181       val = (n_left_px > 0) ? left_ref[0] : base - 1;
1182     }
1183     for (i = 0; i < txhpx; ++i) {
1184       aom_memset16(dst, val, txwpx);
1185       dst += dst_stride;
1186     }
1187     return;
1188   }
1189 
1190   // NEED_LEFT
1191   if (need_left) {
1192     int need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
1193     if (use_filter_intra) need_bottom = 0;
1194     if (is_dr_mode) need_bottom = p_angle > 180;
1195     const int num_left_pixels_needed = txhpx + (need_bottom ? txwpx : 0);
1196     i = 0;
1197     if (n_left_px > 0) {
1198       for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1199       if (need_bottom && n_bottomleft_px > 0) {
1200         assert(i == txhpx);
1201         for (; i < txhpx + n_bottomleft_px; i++)
1202           left_col[i] = left_ref[i * ref_stride];
1203       }
1204       if (i < num_left_pixels_needed)
1205         aom_memset16(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1206     } else {
1207       if (n_top_px > 0) {
1208         aom_memset16(left_col, above_ref[0], num_left_pixels_needed);
1209       } else {
1210         aom_memset16(left_col, base + 1, num_left_pixels_needed);
1211       }
1212     }
1213   }
1214 
1215   // NEED_ABOVE
1216   if (need_above) {
1217     int need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
1218     if (use_filter_intra) need_right = 0;
1219     if (is_dr_mode) need_right = p_angle < 90;
1220     const int num_top_pixels_needed = txwpx + (need_right ? txhpx : 0);
1221     if (n_top_px > 0) {
1222       memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0]));
1223       i = n_top_px;
1224       if (need_right && n_topright_px > 0) {
1225         assert(n_top_px == txwpx);
1226         memcpy(above_row + txwpx, above_ref + txwpx,
1227                n_topright_px * sizeof(above_ref[0]));
1228         i += n_topright_px;
1229       }
1230       if (i < num_top_pixels_needed)
1231         aom_memset16(&above_row[i], above_row[i - 1],
1232                      num_top_pixels_needed - i);
1233     } else {
1234       if (n_left_px > 0) {
1235         aom_memset16(above_row, left_ref[0], num_top_pixels_needed);
1236       } else {
1237         aom_memset16(above_row, base - 1, num_top_pixels_needed);
1238       }
1239     }
1240   }
1241 
1242   if (need_above_left) {
1243     if (n_top_px > 0 && n_left_px > 0) {
1244       above_row[-1] = above_ref[-1];
1245     } else if (n_top_px > 0) {
1246       above_row[-1] = above_ref[0];
1247     } else if (n_left_px > 0) {
1248       above_row[-1] = left_ref[0];
1249     } else {
1250       above_row[-1] = base;
1251     }
1252     left_col[-1] = above_row[-1];
1253   }
1254 
1255   if (use_filter_intra) {
1256     highbd_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1257                                   filter_intra_mode, xd->bd);
1258     return;
1259   }
1260 
1261   if (is_dr_mode) {
1262     int upsample_above = 0;
1263     int upsample_left = 0;
1264     if (!disable_edge_filter) {
1265       const int need_right = p_angle < 90;
1266       const int need_bottom = p_angle > 180;
1267       const int filt_type = get_filt_type(xd, plane);
1268       if (p_angle != 90 && p_angle != 180) {
1269         const int ab_le = need_above_left ? 1 : 0;
1270         if (need_above && need_left && (txwpx + txhpx >= 24)) {
1271           filter_intra_edge_corner_high(above_row, left_col);
1272         }
1273         if (need_above && n_top_px > 0) {
1274           const int strength =
1275               intra_edge_filter_strength(txwpx, txhpx, p_angle - 90, filt_type);
1276           const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1277           av1_filter_intra_edge_high(above_row - ab_le, n_px, strength);
1278         }
1279         if (need_left && n_left_px > 0) {
1280           const int strength = intra_edge_filter_strength(
1281               txhpx, txwpx, p_angle - 180, filt_type);
1282           const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1283           av1_filter_intra_edge_high(left_col - ab_le, n_px, strength);
1284         }
1285       }
1286       upsample_above =
1287           av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90, filt_type);
1288       if (need_above && upsample_above) {
1289         const int n_px = txwpx + (need_right ? txhpx : 0);
1290         av1_upsample_intra_edge_high(above_row, n_px, xd->bd);
1291       }
1292       upsample_left =
1293           av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180, filt_type);
1294       if (need_left && upsample_left) {
1295         const int n_px = txhpx + (need_bottom ? txwpx : 0);
1296         av1_upsample_intra_edge_high(left_col, n_px, xd->bd);
1297       }
1298     }
1299     highbd_dr_predictor(dst, dst_stride, tx_size, above_row, left_col,
1300                         upsample_above, upsample_left, p_angle, xd->bd);
1301     return;
1302   }
1303 
1304   // predict
1305   if (mode == DC_PRED) {
1306     dc_pred_high[n_left_px > 0][n_top_px > 0][tx_size](
1307         dst, dst_stride, above_row, left_col, xd->bd);
1308   } else {
1309     pred_high[mode][tx_size](dst, dst_stride, above_row, left_col, xd->bd);
1310   }
1311 }
1312 
build_intra_predictors(const MACROBLOCKD * xd,const uint8_t * ref,int ref_stride,uint8_t * dst,int dst_stride,PREDICTION_MODE mode,int angle_delta,FILTER_INTRA_MODE filter_intra_mode,TX_SIZE tx_size,int disable_edge_filter,int n_top_px,int n_topright_px,int n_left_px,int n_bottomleft_px,int plane)1313 static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
1314                                    int ref_stride, uint8_t *dst, int dst_stride,
1315                                    PREDICTION_MODE mode, int angle_delta,
1316                                    FILTER_INTRA_MODE filter_intra_mode,
1317                                    TX_SIZE tx_size, int disable_edge_filter,
1318                                    int n_top_px, int n_topright_px,
1319                                    int n_left_px, int n_bottomleft_px,
1320                                    int plane) {
1321   int i;
1322   const uint8_t *above_ref = ref - ref_stride;
1323   const uint8_t *left_ref = ref - 1;
1324   DECLARE_ALIGNED(16, uint8_t, left_data[MAX_TX_SIZE * 2 + 32]);
1325   DECLARE_ALIGNED(16, uint8_t, above_data[MAX_TX_SIZE * 2 + 32]);
1326   uint8_t *const above_row = above_data + 16;
1327   uint8_t *const left_col = left_data + 16;
1328   const int txwpx = tx_size_wide[tx_size];
1329   const int txhpx = tx_size_high[tx_size];
1330   int need_left = extend_modes[mode] & NEED_LEFT;
1331   int need_above = extend_modes[mode] & NEED_ABOVE;
1332   int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1333   int p_angle = 0;
1334   const int is_dr_mode = av1_is_directional_mode(mode);
1335   const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1336 
1337   // The default values if ref pixels are not available:
1338   // 127 127 127 .. 127 127 127 127 127 127
1339   // 129  A   B  ..  Y   Z
1340   // 129  C   D  ..  W   X
1341   // 129  E   F  ..  U   V
1342   // 129  G   H  ..  S   T   T   T   T   T
1343   // ..
1344 
1345   if (is_dr_mode) {
1346     p_angle = mode_to_angle_map[mode] + angle_delta;
1347     if (p_angle <= 90)
1348       need_above = 1, need_left = 0, need_above_left = 1;
1349     else if (p_angle < 180)
1350       need_above = 1, need_left = 1, need_above_left = 1;
1351     else
1352       need_above = 0, need_left = 1, need_above_left = 1;
1353   }
1354   if (use_filter_intra) need_left = need_above = need_above_left = 1;
1355 
1356   assert(n_top_px >= 0);
1357   assert(n_topright_px >= 0);
1358   assert(n_left_px >= 0);
1359   assert(n_bottomleft_px >= 0);
1360 
1361   if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1362     int val;
1363     if (need_left) {
1364       val = (n_top_px > 0) ? above_ref[0] : 129;
1365     } else {
1366       val = (n_left_px > 0) ? left_ref[0] : 127;
1367     }
1368     for (i = 0; i < txhpx; ++i) {
1369       memset(dst, val, txwpx);
1370       dst += dst_stride;
1371     }
1372     return;
1373   }
1374 
1375   // NEED_LEFT
1376   if (need_left) {
1377     int need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
1378     if (use_filter_intra) need_bottom = 0;
1379     if (is_dr_mode) need_bottom = p_angle > 180;
1380     const int num_left_pixels_needed = txhpx + (need_bottom ? txwpx : 0);
1381     i = 0;
1382     if (n_left_px > 0) {
1383       for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1384       if (need_bottom && n_bottomleft_px > 0) {
1385         assert(i == txhpx);
1386         for (; i < txhpx + n_bottomleft_px; i++)
1387           left_col[i] = left_ref[i * ref_stride];
1388       }
1389       if (i < num_left_pixels_needed)
1390         memset(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1391     } else {
1392       if (n_top_px > 0) {
1393         memset(left_col, above_ref[0], num_left_pixels_needed);
1394       } else {
1395         memset(left_col, 129, num_left_pixels_needed);
1396       }
1397     }
1398   }
1399 
1400   // NEED_ABOVE
1401   if (need_above) {
1402     int need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
1403     if (use_filter_intra) need_right = 0;
1404     if (is_dr_mode) need_right = p_angle < 90;
1405     const int num_top_pixels_needed = txwpx + (need_right ? txhpx : 0);
1406     if (n_top_px > 0) {
1407       memcpy(above_row, above_ref, n_top_px);
1408       i = n_top_px;
1409       if (need_right && n_topright_px > 0) {
1410         assert(n_top_px == txwpx);
1411         memcpy(above_row + txwpx, above_ref + txwpx, n_topright_px);
1412         i += n_topright_px;
1413       }
1414       if (i < num_top_pixels_needed)
1415         memset(&above_row[i], above_row[i - 1], num_top_pixels_needed - i);
1416     } else {
1417       if (n_left_px > 0) {
1418         memset(above_row, left_ref[0], num_top_pixels_needed);
1419       } else {
1420         memset(above_row, 127, num_top_pixels_needed);
1421       }
1422     }
1423   }
1424 
1425   if (need_above_left) {
1426     if (n_top_px > 0 && n_left_px > 0) {
1427       above_row[-1] = above_ref[-1];
1428     } else if (n_top_px > 0) {
1429       above_row[-1] = above_ref[0];
1430     } else if (n_left_px > 0) {
1431       above_row[-1] = left_ref[0];
1432     } else {
1433       above_row[-1] = 128;
1434     }
1435     left_col[-1] = above_row[-1];
1436   }
1437 
1438   if (use_filter_intra) {
1439     av1_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1440                                filter_intra_mode);
1441     return;
1442   }
1443 
1444   if (is_dr_mode) {
1445     int upsample_above = 0;
1446     int upsample_left = 0;
1447     if (!disable_edge_filter) {
1448       const int need_right = p_angle < 90;
1449       const int need_bottom = p_angle > 180;
1450       const int filt_type = get_filt_type(xd, plane);
1451       if (p_angle != 90 && p_angle != 180) {
1452         const int ab_le = need_above_left ? 1 : 0;
1453         if (need_above && need_left && (txwpx + txhpx >= 24)) {
1454           filter_intra_edge_corner(above_row, left_col);
1455         }
1456         if (need_above && n_top_px > 0) {
1457           const int strength =
1458               intra_edge_filter_strength(txwpx, txhpx, p_angle - 90, filt_type);
1459           const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1460           av1_filter_intra_edge(above_row - ab_le, n_px, strength);
1461         }
1462         if (need_left && n_left_px > 0) {
1463           const int strength = intra_edge_filter_strength(
1464               txhpx, txwpx, p_angle - 180, filt_type);
1465           const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1466           av1_filter_intra_edge(left_col - ab_le, n_px, strength);
1467         }
1468       }
1469       upsample_above =
1470           av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90, filt_type);
1471       if (need_above && upsample_above) {
1472         const int n_px = txwpx + (need_right ? txhpx : 0);
1473         av1_upsample_intra_edge(above_row, n_px);
1474       }
1475       upsample_left =
1476           av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180, filt_type);
1477       if (need_left && upsample_left) {
1478         const int n_px = txhpx + (need_bottom ? txwpx : 0);
1479         av1_upsample_intra_edge(left_col, n_px);
1480       }
1481     }
1482     dr_predictor(dst, dst_stride, tx_size, above_row, left_col, upsample_above,
1483                  upsample_left, p_angle);
1484     return;
1485   }
1486 
1487   // predict
1488   if (mode == DC_PRED) {
1489     dc_pred[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride, above_row,
1490                                                   left_col);
1491   } else {
1492     pred[mode][tx_size](dst, dst_stride, above_row, left_col);
1493   }
1494 }
1495 
av1_predict_intra_block(const AV1_COMMON * cm,const MACROBLOCKD * xd,int wpx,int hpx,TX_SIZE tx_size,PREDICTION_MODE mode,int angle_delta,int use_palette,FILTER_INTRA_MODE filter_intra_mode,const uint8_t * ref,int ref_stride,uint8_t * dst,int dst_stride,int col_off,int row_off,int plane)1496 void av1_predict_intra_block(
1497     const AV1_COMMON *cm, const MACROBLOCKD *xd, int wpx, int hpx,
1498     TX_SIZE tx_size, PREDICTION_MODE mode, int angle_delta, int use_palette,
1499     FILTER_INTRA_MODE filter_intra_mode, const uint8_t *ref, int ref_stride,
1500     uint8_t *dst, int dst_stride, int col_off, int row_off, int plane) {
1501   const MB_MODE_INFO *const mbmi = xd->mi[0];
1502   const int txwpx = tx_size_wide[tx_size];
1503   const int txhpx = tx_size_high[tx_size];
1504   const int x = col_off << tx_size_wide_log2[0];
1505   const int y = row_off << tx_size_high_log2[0];
1506 
1507   if (use_palette) {
1508     int r, c;
1509     const uint8_t *const map = xd->plane[plane != 0].color_index_map +
1510                                xd->color_index_map_offset[plane != 0];
1511     const uint16_t *const palette =
1512         mbmi->palette_mode_info.palette_colors + plane * PALETTE_MAX_SIZE;
1513     if (is_cur_buf_hbd(xd)) {
1514       uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
1515       for (r = 0; r < txhpx; ++r) {
1516         for (c = 0; c < txwpx; ++c) {
1517           dst16[r * dst_stride + c] = palette[map[(r + y) * wpx + c + x]];
1518         }
1519       }
1520     } else {
1521       for (r = 0; r < txhpx; ++r) {
1522         for (c = 0; c < txwpx; ++c) {
1523           dst[r * dst_stride + c] =
1524               (uint8_t)palette[map[(r + y) * wpx + c + x]];
1525         }
1526       }
1527     }
1528     return;
1529   }
1530 
1531   BLOCK_SIZE bsize = mbmi->sb_type;
1532   const struct macroblockd_plane *const pd = &xd->plane[plane];
1533   const int txw = tx_size_wide_unit[tx_size];
1534   const int txh = tx_size_high_unit[tx_size];
1535   const int have_top = row_off || (pd->subsampling_y ? xd->chroma_up_available
1536                                                      : xd->up_available);
1537   const int have_left =
1538       col_off ||
1539       (pd->subsampling_x ? xd->chroma_left_available : xd->left_available);
1540   const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
1541   const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2);
1542   const int xr_chr_offset = 0;
1543   const int yd_chr_offset = 0;
1544 
1545   // Distance between the right edge of this prediction block to
1546   // the frame right edge
1547   const int xr = (xd->mb_to_right_edge >> (3 + pd->subsampling_x)) +
1548                  (wpx - x - txwpx) - xr_chr_offset;
1549   // Distance between the bottom edge of this prediction block to
1550   // the frame bottom edge
1551   const int yd = (xd->mb_to_bottom_edge >> (3 + pd->subsampling_y)) +
1552                  (hpx - y - txhpx) - yd_chr_offset;
1553   const int right_available =
1554       mi_col + ((col_off + txw) << pd->subsampling_x) < xd->tile.mi_col_end;
1555   const int bottom_available =
1556       (yd > 0) &&
1557       (mi_row + ((row_off + txh) << pd->subsampling_y) < xd->tile.mi_row_end);
1558 
1559   const PARTITION_TYPE partition = mbmi->partition;
1560 
1561   // force 4x4 chroma component block size.
1562   bsize = scale_chroma_bsize(bsize, pd->subsampling_x, pd->subsampling_y);
1563 
1564   const int have_top_right = has_top_right(
1565       cm, bsize, mi_row, mi_col, have_top, right_available, partition, tx_size,
1566       row_off, col_off, pd->subsampling_x, pd->subsampling_y);
1567   const int have_bottom_left = has_bottom_left(
1568       cm, bsize, mi_row, mi_col, bottom_available, have_left, partition,
1569       tx_size, row_off, col_off, pd->subsampling_x, pd->subsampling_y);
1570 
1571   const int disable_edge_filter = !cm->seq_params.enable_intra_edge_filter;
1572   if (is_cur_buf_hbd(xd)) {
1573     build_intra_predictors_high(
1574         xd, ref, ref_stride, dst, dst_stride, mode, angle_delta,
1575         filter_intra_mode, tx_size, disable_edge_filter,
1576         have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
1577         have_top_right ? AOMMIN(txwpx, xr) : 0,
1578         have_left ? AOMMIN(txhpx, yd + txhpx) : 0,
1579         have_bottom_left ? AOMMIN(txhpx, yd) : 0, plane);
1580     return;
1581   }
1582 
1583   build_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode,
1584                          angle_delta, filter_intra_mode, tx_size,
1585                          disable_edge_filter,
1586                          have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
1587                          have_top_right ? AOMMIN(txwpx, xr) : 0,
1588                          have_left ? AOMMIN(txhpx, yd + txhpx) : 0,
1589                          have_bottom_left ? AOMMIN(txhpx, yd) : 0, plane);
1590 }
1591 
av1_predict_intra_block_facade(const AV1_COMMON * cm,MACROBLOCKD * xd,int plane,int blk_col,int blk_row,TX_SIZE tx_size)1592 void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd,
1593                                     int plane, int blk_col, int blk_row,
1594                                     TX_SIZE tx_size) {
1595   const MB_MODE_INFO *const mbmi = xd->mi[0];
1596   struct macroblockd_plane *const pd = &xd->plane[plane];
1597   const int dst_stride = pd->dst.stride;
1598   uint8_t *dst =
1599       &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
1600   const PREDICTION_MODE mode =
1601       (plane == AOM_PLANE_Y) ? mbmi->mode : get_uv_mode(mbmi->uv_mode);
1602   const int use_palette = mbmi->palette_mode_info.palette_size[plane != 0] > 0;
1603   const FILTER_INTRA_MODE filter_intra_mode =
1604       (plane == AOM_PLANE_Y && mbmi->filter_intra_mode_info.use_filter_intra)
1605           ? mbmi->filter_intra_mode_info.filter_intra_mode
1606           : FILTER_INTRA_MODES;
1607   const int angle_delta = mbmi->angle_delta[plane != AOM_PLANE_Y] * ANGLE_STEP;
1608 
1609   if (plane != AOM_PLANE_Y && mbmi->uv_mode == UV_CFL_PRED) {
1610 #if CONFIG_DEBUG
1611     assert(is_cfl_allowed(xd));
1612     const BLOCK_SIZE plane_bsize = get_plane_block_size(
1613         mbmi->sb_type, pd->subsampling_x, pd->subsampling_y);
1614     (void)plane_bsize;
1615     assert(plane_bsize < BLOCK_SIZES_ALL);
1616     if (!xd->lossless[mbmi->segment_id]) {
1617       assert(blk_col == 0);
1618       assert(blk_row == 0);
1619       assert(block_size_wide[plane_bsize] == tx_size_wide[tx_size]);
1620       assert(block_size_high[plane_bsize] == tx_size_high[tx_size]);
1621     }
1622 #endif
1623     CFL_CTX *const cfl = &xd->cfl;
1624     CFL_PRED_TYPE pred_plane = get_cfl_pred_type(plane);
1625     if (cfl->dc_pred_is_cached[pred_plane] == 0) {
1626       av1_predict_intra_block(cm, xd, pd->width, pd->height, tx_size, mode,
1627                               angle_delta, use_palette, filter_intra_mode, dst,
1628                               dst_stride, dst, dst_stride, blk_col, blk_row,
1629                               plane);
1630       if (cfl->use_dc_pred_cache) {
1631         cfl_store_dc_pred(xd, dst, pred_plane, tx_size_wide[tx_size]);
1632         cfl->dc_pred_is_cached[pred_plane] = 1;
1633       }
1634     } else {
1635       cfl_load_dc_pred(xd, dst, dst_stride, tx_size, pred_plane);
1636     }
1637     cfl_predict_block(xd, dst, dst_stride, tx_size, plane);
1638     return;
1639   }
1640   av1_predict_intra_block(cm, xd, pd->width, pd->height, tx_size, mode,
1641                           angle_delta, use_palette, filter_intra_mode, dst,
1642                           dst_stride, dst, dst_stride, blk_col, blk_row, plane);
1643 }
1644 
av1_init_intra_predictors(void)1645 void av1_init_intra_predictors(void) {
1646   aom_once(init_intra_predictors_internal);
1647 }
1648