1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <math.h>
13 
14 #include "config/aom_config.h"
15 #include "config/aom_dsp_rtcd.h"
16 #include "config/av1_rtcd.h"
17 
18 #include "aom_dsp/aom_dsp_common.h"
19 #include "aom_mem/aom_mem.h"
20 #include "aom_ports/aom_once.h"
21 #include "aom_ports/mem.h"
22 #include "aom_ports/system_state.h"
23 #include "av1/common/av1_common_int.h"
24 #include "av1/common/cfl.h"
25 #include "av1/common/reconintra.h"
26 
27 enum {
28   NEED_LEFT = 1 << 1,
29   NEED_ABOVE = 1 << 2,
30   NEED_ABOVERIGHT = 1 << 3,
31   NEED_ABOVELEFT = 1 << 4,
32   NEED_BOTTOMLEFT = 1 << 5,
33 };
34 
35 #define INTRA_EDGE_FILT 3
36 #define INTRA_EDGE_TAPS 5
37 #define MAX_UPSAMPLE_SZ 16
38 
39 static const uint8_t extend_modes[INTRA_MODES] = {
40   NEED_ABOVE | NEED_LEFT,                   // DC
41   NEED_ABOVE,                               // V
42   NEED_LEFT,                                // H
43   NEED_ABOVE | NEED_ABOVERIGHT,             // D45
44   NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D135
45   NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D113
46   NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D157
47   NEED_LEFT | NEED_BOTTOMLEFT,              // D203
48   NEED_ABOVE | NEED_ABOVERIGHT,             // D67
49   NEED_LEFT | NEED_ABOVE,                   // SMOOTH
50   NEED_LEFT | NEED_ABOVE,                   // SMOOTH_V
51   NEED_LEFT | NEED_ABOVE,                   // SMOOTH_H
52   NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // PAETH
53 };
54 
55 // Tables to store if the top-right reference pixels are available. The flags
56 // are represented with bits, packed into 8-bit integers. E.g., for the 32x32
57 // blocks in a 128x128 superblock, the index of the "o" block is 10 (in raster
58 // order), so its flag is stored at the 3rd bit of the 2nd entry in the table,
59 // i.e. (table[10 / 8] >> (10 % 8)) & 1.
60 //       . . . .
61 //       . . . .
62 //       . . o .
63 //       . . . .
64 static uint8_t has_tr_4x4[128] = {
65   255, 255, 255, 255, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
66   127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
67   255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
68   127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
69   255, 255, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
70   127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
71   255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
72   127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
73 };
74 static uint8_t has_tr_4x8[64] = {
75   255, 255, 255, 255, 119, 119, 119, 119, 127, 127, 127, 127, 119,
76   119, 119, 119, 255, 127, 255, 127, 119, 119, 119, 119, 127, 127,
77   127, 127, 119, 119, 119, 119, 255, 255, 255, 127, 119, 119, 119,
78   119, 127, 127, 127, 127, 119, 119, 119, 119, 255, 127, 255, 127,
79   119, 119, 119, 119, 127, 127, 127, 127, 119, 119, 119, 119,
80 };
81 static uint8_t has_tr_8x4[64] = {
82   255, 255, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
83   127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
84   255, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
85   127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
86 };
87 static uint8_t has_tr_8x8[32] = {
88   255, 255, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
89   255, 127, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
90 };
91 static uint8_t has_tr_8x16[16] = {
92   255, 255, 119, 119, 127, 127, 119, 119,
93   255, 127, 119, 119, 127, 127, 119, 119,
94 };
95 static uint8_t has_tr_16x8[16] = {
96   255, 0, 85, 0, 119, 0, 85, 0, 127, 0, 85, 0, 119, 0, 85, 0,
97 };
98 static uint8_t has_tr_16x16[8] = {
99   255, 85, 119, 85, 127, 85, 119, 85,
100 };
101 static uint8_t has_tr_16x32[4] = { 255, 119, 127, 119 };
102 static uint8_t has_tr_32x16[4] = { 15, 5, 7, 5 };
103 static uint8_t has_tr_32x32[2] = { 95, 87 };
104 static uint8_t has_tr_32x64[1] = { 127 };
105 static uint8_t has_tr_64x32[1] = { 19 };
106 static uint8_t has_tr_64x64[1] = { 7 };
107 static uint8_t has_tr_64x128[1] = { 3 };
108 static uint8_t has_tr_128x64[1] = { 1 };
109 static uint8_t has_tr_128x128[1] = { 1 };
110 static uint8_t has_tr_4x16[32] = {
111   255, 255, 255, 255, 127, 127, 127, 127, 255, 127, 255,
112   127, 127, 127, 127, 127, 255, 255, 255, 127, 127, 127,
113   127, 127, 255, 127, 255, 127, 127, 127, 127, 127,
114 };
115 static uint8_t has_tr_16x4[32] = {
116   255, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
117   127, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
118 };
119 static uint8_t has_tr_8x32[8] = {
120   255, 255, 127, 127, 255, 127, 127, 127,
121 };
122 static uint8_t has_tr_32x8[8] = {
123   15, 0, 5, 0, 7, 0, 5, 0,
124 };
125 static uint8_t has_tr_16x64[2] = { 255, 127 };
126 static uint8_t has_tr_64x16[2] = { 3, 1 };
127 
128 static const uint8_t *const has_tr_tables[BLOCK_SIZES_ALL] = {
129   // 4X4
130   has_tr_4x4,
131   // 4X8,       8X4,            8X8
132   has_tr_4x8, has_tr_8x4, has_tr_8x8,
133   // 8X16,      16X8,           16X16
134   has_tr_8x16, has_tr_16x8, has_tr_16x16,
135   // 16X32,     32X16,          32X32
136   has_tr_16x32, has_tr_32x16, has_tr_32x32,
137   // 32X64,     64X32,          64X64
138   has_tr_32x64, has_tr_64x32, has_tr_64x64,
139   // 64x128,    128x64,         128x128
140   has_tr_64x128, has_tr_128x64, has_tr_128x128,
141   // 4x16,      16x4,            8x32
142   has_tr_4x16, has_tr_16x4, has_tr_8x32,
143   // 32x8,      16x64,           64x16
144   has_tr_32x8, has_tr_16x64, has_tr_64x16
145 };
146 
147 static uint8_t has_tr_vert_8x8[32] = {
148   255, 255, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
149   255, 127, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
150 };
151 static uint8_t has_tr_vert_16x16[8] = {
152   255, 0, 119, 0, 127, 0, 119, 0,
153 };
154 static uint8_t has_tr_vert_32x32[2] = { 15, 7 };
155 static uint8_t has_tr_vert_64x64[1] = { 3 };
156 
157 // The _vert_* tables are like the ordinary tables above, but describe the
158 // order we visit square blocks when doing a PARTITION_VERT_A or
159 // PARTITION_VERT_B. This is the same order as normal except for on the last
160 // split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
161 // as a pair of squares, which means that these tables work correctly for both
162 // mixed vertical partition types.
163 //
164 // There are tables for each of the square sizes. Vertical rectangles (like
165 // BLOCK_16X32) use their respective "non-vert" table
166 static const uint8_t *const has_tr_vert_tables[BLOCK_SIZES] = {
167   // 4X4
168   NULL,
169   // 4X8,      8X4,         8X8
170   has_tr_4x8, NULL, has_tr_vert_8x8,
171   // 8X16,     16X8,        16X16
172   has_tr_8x16, NULL, has_tr_vert_16x16,
173   // 16X32,    32X16,       32X32
174   has_tr_16x32, NULL, has_tr_vert_32x32,
175   // 32X64,    64X32,       64X64
176   has_tr_32x64, NULL, has_tr_vert_64x64,
177   // 64x128,   128x64,      128x128
178   has_tr_64x128, NULL, has_tr_128x128
179 };
180 
get_has_tr_table(PARTITION_TYPE partition,BLOCK_SIZE bsize)181 static const uint8_t *get_has_tr_table(PARTITION_TYPE partition,
182                                        BLOCK_SIZE bsize) {
183   const uint8_t *ret = NULL;
184   // If this is a mixed vertical partition, look up bsize in orders_vert.
185   if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
186     assert(bsize < BLOCK_SIZES);
187     ret = has_tr_vert_tables[bsize];
188   } else {
189     ret = has_tr_tables[bsize];
190   }
191   assert(ret);
192   return ret;
193 }
194 
has_top_right(const AV1_COMMON * cm,BLOCK_SIZE bsize,int mi_row,int mi_col,int top_available,int right_available,PARTITION_TYPE partition,TX_SIZE txsz,int row_off,int col_off,int ss_x,int ss_y)195 static int has_top_right(const AV1_COMMON *cm, BLOCK_SIZE bsize, int mi_row,
196                          int mi_col, int top_available, int right_available,
197                          PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
198                          int col_off, int ss_x, int ss_y) {
199   if (!top_available || !right_available) return 0;
200 
201   const int bw_unit = mi_size_wide[bsize];
202   const int plane_bw_unit = AOMMAX(bw_unit >> ss_x, 1);
203   const int top_right_count_unit = tx_size_wide_unit[txsz];
204 
205   if (row_off > 0) {  // Just need to check if enough pixels on the right.
206     if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64]) {
207       // Special case: For 128x128 blocks, the transform unit whose
208       // top-right corner is at the center of the block does in fact have
209       // pixels available at its top-right corner.
210       if (row_off == mi_size_high[BLOCK_64X64] >> ss_y &&
211           col_off + top_right_count_unit == mi_size_wide[BLOCK_64X64] >> ss_x) {
212         return 1;
213       }
214       const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
215       const int col_off_64 = col_off % plane_bw_unit_64;
216       return col_off_64 + top_right_count_unit < plane_bw_unit_64;
217     }
218     return col_off + top_right_count_unit < plane_bw_unit;
219   } else {
220     // All top-right pixels are in the block above, which is already available.
221     if (col_off + top_right_count_unit < plane_bw_unit) return 1;
222 
223     const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
224     const int bh_in_mi_log2 = mi_size_high_log2[bsize];
225     const int sb_mi_size = mi_size_high[cm->seq_params.sb_size];
226     const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
227     const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
228 
229     // Top row of superblock: so top-right pixels are in the top and/or
230     // top-right superblocks, both of which are already available.
231     if (blk_row_in_sb == 0) return 1;
232 
233     // Rightmost column of superblock (and not the top row): so top-right pixels
234     // fall in the right superblock, which is not available yet.
235     if (((blk_col_in_sb + 1) << bw_in_mi_log2) >= sb_mi_size) {
236       return 0;
237     }
238 
239     // General case (neither top row nor rightmost column): check if the
240     // top-right block is coded before the current block.
241     const int this_blk_index =
242         ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
243         blk_col_in_sb + 0;
244     const int idx1 = this_blk_index / 8;
245     const int idx2 = this_blk_index % 8;
246     const uint8_t *has_tr_table = get_has_tr_table(partition, bsize);
247     return (has_tr_table[idx1] >> idx2) & 1;
248   }
249 }
250 
251 // Similar to the has_tr_* tables, but store if the bottom-left reference
252 // pixels are available.
253 static uint8_t has_bl_4x4[128] = {
254   84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85, 85,
255   85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  1,  0,  84, 85, 85, 85, 16, 17,
256   17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85, 85, 85, 16, 17, 17, 17, 84,
257   85, 85, 85, 0,  0,  0,  0,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85,
258   0,  1,  1,  1,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  1,
259   0,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85,
260   85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  0,  0,
261 };
262 static uint8_t has_bl_4x8[64] = {
263   16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
264   16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
265   16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
266   16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
267 };
268 static uint8_t has_bl_8x4[64] = {
269   254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
270   254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
271   254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
272   254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
273 };
274 static uint8_t has_bl_8x8[32] = {
275   84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
276   84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
277 };
278 static uint8_t has_bl_8x16[16] = {
279   16, 17, 0, 1, 16, 17, 0, 0, 16, 17, 0, 1, 16, 17, 0, 0,
280 };
281 static uint8_t has_bl_16x8[16] = {
282   254, 84, 254, 16, 254, 84, 254, 0, 254, 84, 254, 16, 254, 84, 254, 0,
283 };
284 static uint8_t has_bl_16x16[8] = {
285   84, 16, 84, 0, 84, 16, 84, 0,
286 };
287 static uint8_t has_bl_16x32[4] = { 16, 0, 16, 0 };
288 static uint8_t has_bl_32x16[4] = { 78, 14, 78, 14 };
289 static uint8_t has_bl_32x32[2] = { 4, 4 };
290 static uint8_t has_bl_32x64[1] = { 0 };
291 static uint8_t has_bl_64x32[1] = { 34 };
292 static uint8_t has_bl_64x64[1] = { 0 };
293 static uint8_t has_bl_64x128[1] = { 0 };
294 static uint8_t has_bl_128x64[1] = { 0 };
295 static uint8_t has_bl_128x128[1] = { 0 };
296 static uint8_t has_bl_4x16[32] = {
297   0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
298   0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
299 };
300 static uint8_t has_bl_16x4[32] = {
301   254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
302   254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
303 };
304 static uint8_t has_bl_8x32[8] = {
305   0, 1, 0, 0, 0, 1, 0, 0,
306 };
307 static uint8_t has_bl_32x8[8] = {
308   238, 78, 238, 14, 238, 78, 238, 14,
309 };
310 static uint8_t has_bl_16x64[2] = { 0, 0 };
311 static uint8_t has_bl_64x16[2] = { 42, 42 };
312 
313 static const uint8_t *const has_bl_tables[BLOCK_SIZES_ALL] = {
314   // 4X4
315   has_bl_4x4,
316   // 4X8,         8X4,         8X8
317   has_bl_4x8, has_bl_8x4, has_bl_8x8,
318   // 8X16,        16X8,        16X16
319   has_bl_8x16, has_bl_16x8, has_bl_16x16,
320   // 16X32,       32X16,       32X32
321   has_bl_16x32, has_bl_32x16, has_bl_32x32,
322   // 32X64,       64X32,       64X64
323   has_bl_32x64, has_bl_64x32, has_bl_64x64,
324   // 64x128,      128x64,      128x128
325   has_bl_64x128, has_bl_128x64, has_bl_128x128,
326   // 4x16,        16x4,        8x32
327   has_bl_4x16, has_bl_16x4, has_bl_8x32,
328   // 32x8,        16x64,       64x16
329   has_bl_32x8, has_bl_16x64, has_bl_64x16
330 };
331 
332 static uint8_t has_bl_vert_8x8[32] = {
333   254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
334   254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
335 };
336 static uint8_t has_bl_vert_16x16[8] = {
337   254, 16, 254, 0, 254, 16, 254, 0,
338 };
339 static uint8_t has_bl_vert_32x32[2] = { 14, 14 };
340 static uint8_t has_bl_vert_64x64[1] = { 2 };
341 
342 // The _vert_* tables are like the ordinary tables above, but describe the
343 // order we visit square blocks when doing a PARTITION_VERT_A or
344 // PARTITION_VERT_B. This is the same order as normal except for on the last
345 // split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
346 // as a pair of squares, which means that these tables work correctly for both
347 // mixed vertical partition types.
348 //
349 // There are tables for each of the square sizes. Vertical rectangles (like
350 // BLOCK_16X32) use their respective "non-vert" table
351 static const uint8_t *const has_bl_vert_tables[BLOCK_SIZES] = {
352   // 4X4
353   NULL,
354   // 4X8,     8X4,         8X8
355   has_bl_4x8, NULL, has_bl_vert_8x8,
356   // 8X16,    16X8,        16X16
357   has_bl_8x16, NULL, has_bl_vert_16x16,
358   // 16X32,   32X16,       32X32
359   has_bl_16x32, NULL, has_bl_vert_32x32,
360   // 32X64,   64X32,       64X64
361   has_bl_32x64, NULL, has_bl_vert_64x64,
362   // 64x128,  128x64,      128x128
363   has_bl_64x128, NULL, has_bl_128x128
364 };
365 
get_has_bl_table(PARTITION_TYPE partition,BLOCK_SIZE bsize)366 static const uint8_t *get_has_bl_table(PARTITION_TYPE partition,
367                                        BLOCK_SIZE bsize) {
368   const uint8_t *ret = NULL;
369   // If this is a mixed vertical partition, look up bsize in orders_vert.
370   if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
371     assert(bsize < BLOCK_SIZES);
372     ret = has_bl_vert_tables[bsize];
373   } else {
374     ret = has_bl_tables[bsize];
375   }
376   assert(ret);
377   return ret;
378 }
379 
has_bottom_left(const AV1_COMMON * cm,BLOCK_SIZE bsize,int mi_row,int mi_col,int bottom_available,int left_available,PARTITION_TYPE partition,TX_SIZE txsz,int row_off,int col_off,int ss_x,int ss_y)380 static int has_bottom_left(const AV1_COMMON *cm, BLOCK_SIZE bsize, int mi_row,
381                            int mi_col, int bottom_available, int left_available,
382                            PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
383                            int col_off, int ss_x, int ss_y) {
384   if (!bottom_available || !left_available) return 0;
385 
386   // Special case for 128x* blocks, when col_off is half the block width.
387   // This is needed because 128x* superblocks are divided into 64x* blocks in
388   // raster order
389   if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64] && col_off > 0) {
390     const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
391     const int col_off_64 = col_off % plane_bw_unit_64;
392     if (col_off_64 == 0) {
393       // We are at the left edge of top-right or bottom-right 64x* block.
394       const int plane_bh_unit_64 = mi_size_high[BLOCK_64X64] >> ss_y;
395       const int row_off_64 = row_off % plane_bh_unit_64;
396       const int plane_bh_unit =
397           AOMMIN(mi_size_high[bsize] >> ss_y, plane_bh_unit_64);
398       // Check if all bottom-left pixels are in the left 64x* block (which is
399       // already coded).
400       return row_off_64 + tx_size_high_unit[txsz] < plane_bh_unit;
401     }
402   }
403 
404   if (col_off > 0) {
405     // Bottom-left pixels are in the bottom-left block, which is not available.
406     return 0;
407   } else {
408     const int bh_unit = mi_size_high[bsize];
409     const int plane_bh_unit = AOMMAX(bh_unit >> ss_y, 1);
410     const int bottom_left_count_unit = tx_size_high_unit[txsz];
411 
412     // All bottom-left pixels are in the left block, which is already available.
413     if (row_off + bottom_left_count_unit < plane_bh_unit) return 1;
414 
415     const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
416     const int bh_in_mi_log2 = mi_size_high_log2[bsize];
417     const int sb_mi_size = mi_size_high[cm->seq_params.sb_size];
418     const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
419     const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
420 
421     // Leftmost column of superblock: so bottom-left pixels maybe in the left
422     // and/or bottom-left superblocks. But only the left superblock is
423     // available, so check if all required pixels fall in that superblock.
424     if (blk_col_in_sb == 0) {
425       const int blk_start_row_off =
426           blk_row_in_sb << (bh_in_mi_log2 + MI_SIZE_LOG2 - MI_SIZE_LOG2) >>
427           ss_y;
428       const int row_off_in_sb = blk_start_row_off + row_off;
429       const int sb_height_unit = sb_mi_size >> ss_y;
430       return row_off_in_sb + bottom_left_count_unit < sb_height_unit;
431     }
432 
433     // Bottom row of superblock (and not the leftmost column): so bottom-left
434     // pixels fall in the bottom superblock, which is not available yet.
435     if (((blk_row_in_sb + 1) << bh_in_mi_log2) >= sb_mi_size) return 0;
436 
437     // General case (neither leftmost column nor bottom row): check if the
438     // bottom-left block is coded before the current block.
439     const int this_blk_index =
440         ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
441         blk_col_in_sb + 0;
442     const int idx1 = this_blk_index / 8;
443     const int idx2 = this_blk_index % 8;
444     const uint8_t *has_bl_table = get_has_bl_table(partition, bsize);
445     return (has_bl_table[idx1] >> idx2) & 1;
446   }
447 }
448 
449 typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
450                               const uint8_t *above, const uint8_t *left);
451 
452 static intra_pred_fn pred[INTRA_MODES][TX_SIZES_ALL];
453 static intra_pred_fn dc_pred[2][2][TX_SIZES_ALL];
454 
455 #if CONFIG_AV1_HIGHBITDEPTH
456 typedef void (*intra_high_pred_fn)(uint16_t *dst, ptrdiff_t stride,
457                                    const uint16_t *above, const uint16_t *left,
458                                    int bd);
459 static intra_high_pred_fn pred_high[INTRA_MODES][TX_SIZES_ALL];
460 static intra_high_pred_fn dc_pred_high[2][2][TX_SIZES_ALL];
461 #endif
462 
init_intra_predictors_internal(void)463 static void init_intra_predictors_internal(void) {
464   assert(NELEMENTS(mode_to_angle_map) == INTRA_MODES);
465 
466 #define INIT_RECTANGULAR(p, type)             \
467   p[TX_4X8] = aom_##type##_predictor_4x8;     \
468   p[TX_8X4] = aom_##type##_predictor_8x4;     \
469   p[TX_8X16] = aom_##type##_predictor_8x16;   \
470   p[TX_16X8] = aom_##type##_predictor_16x8;   \
471   p[TX_16X32] = aom_##type##_predictor_16x32; \
472   p[TX_32X16] = aom_##type##_predictor_32x16; \
473   p[TX_32X64] = aom_##type##_predictor_32x64; \
474   p[TX_64X32] = aom_##type##_predictor_64x32; \
475   p[TX_4X16] = aom_##type##_predictor_4x16;   \
476   p[TX_16X4] = aom_##type##_predictor_16x4;   \
477   p[TX_8X32] = aom_##type##_predictor_8x32;   \
478   p[TX_32X8] = aom_##type##_predictor_32x8;   \
479   p[TX_16X64] = aom_##type##_predictor_16x64; \
480   p[TX_64X16] = aom_##type##_predictor_64x16;
481 
482 #define INIT_NO_4X4(p, type)                  \
483   p[TX_8X8] = aom_##type##_predictor_8x8;     \
484   p[TX_16X16] = aom_##type##_predictor_16x16; \
485   p[TX_32X32] = aom_##type##_predictor_32x32; \
486   p[TX_64X64] = aom_##type##_predictor_64x64; \
487   INIT_RECTANGULAR(p, type)
488 
489 #define INIT_ALL_SIZES(p, type)           \
490   p[TX_4X4] = aom_##type##_predictor_4x4; \
491   INIT_NO_4X4(p, type)
492 
493   INIT_ALL_SIZES(pred[V_PRED], v);
494   INIT_ALL_SIZES(pred[H_PRED], h);
495   INIT_ALL_SIZES(pred[PAETH_PRED], paeth);
496   INIT_ALL_SIZES(pred[SMOOTH_PRED], smooth);
497   INIT_ALL_SIZES(pred[SMOOTH_V_PRED], smooth_v);
498   INIT_ALL_SIZES(pred[SMOOTH_H_PRED], smooth_h);
499   INIT_ALL_SIZES(dc_pred[0][0], dc_128);
500   INIT_ALL_SIZES(dc_pred[0][1], dc_top);
501   INIT_ALL_SIZES(dc_pred[1][0], dc_left);
502   INIT_ALL_SIZES(dc_pred[1][1], dc);
503 #if CONFIG_AV1_HIGHBITDEPTH
504   INIT_ALL_SIZES(pred_high[V_PRED], highbd_v);
505   INIT_ALL_SIZES(pred_high[H_PRED], highbd_h);
506   INIT_ALL_SIZES(pred_high[PAETH_PRED], highbd_paeth);
507   INIT_ALL_SIZES(pred_high[SMOOTH_PRED], highbd_smooth);
508   INIT_ALL_SIZES(pred_high[SMOOTH_V_PRED], highbd_smooth_v);
509   INIT_ALL_SIZES(pred_high[SMOOTH_H_PRED], highbd_smooth_h);
510   INIT_ALL_SIZES(dc_pred_high[0][0], highbd_dc_128);
511   INIT_ALL_SIZES(dc_pred_high[0][1], highbd_dc_top);
512   INIT_ALL_SIZES(dc_pred_high[1][0], highbd_dc_left);
513   INIT_ALL_SIZES(dc_pred_high[1][1], highbd_dc);
514 #endif
515 #undef intra_pred_allsizes
516 }
517 
518 // Directional prediction, zone 1: 0 < angle < 90
av1_dr_prediction_z1_c(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left,int upsample_above,int dx,int dy)519 void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
520                             const uint8_t *above, const uint8_t *left,
521                             int upsample_above, int dx, int dy) {
522   int r, c, x, base, shift, val;
523 
524   (void)left;
525   (void)dy;
526   assert(dy == 1);
527   assert(dx > 0);
528 
529   const int max_base_x = ((bw + bh) - 1) << upsample_above;
530   const int frac_bits = 6 - upsample_above;
531   const int base_inc = 1 << upsample_above;
532   x = dx;
533   for (r = 0; r < bh; ++r, dst += stride, x += dx) {
534     base = x >> frac_bits;
535     shift = ((x << upsample_above) & 0x3F) >> 1;
536 
537     if (base >= max_base_x) {
538       for (int i = r; i < bh; ++i) {
539         memset(dst, above[max_base_x], bw * sizeof(dst[0]));
540         dst += stride;
541       }
542       return;
543     }
544 
545     for (c = 0; c < bw; ++c, base += base_inc) {
546       if (base < max_base_x) {
547         val = above[base] * (32 - shift) + above[base + 1] * shift;
548         dst[c] = ROUND_POWER_OF_TWO(val, 5);
549       } else {
550         dst[c] = above[max_base_x];
551       }
552     }
553   }
554 }
555 
556 // Directional prediction, zone 2: 90 < angle < 180
av1_dr_prediction_z2_c(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left,int upsample_above,int upsample_left,int dx,int dy)557 void av1_dr_prediction_z2_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
558                             const uint8_t *above, const uint8_t *left,
559                             int upsample_above, int upsample_left, int dx,
560                             int dy) {
561   assert(dx > 0);
562   assert(dy > 0);
563 
564   const int min_base_x = -(1 << upsample_above);
565   const int min_base_y = -(1 << upsample_left);
566   (void)min_base_y;
567   const int frac_bits_x = 6 - upsample_above;
568   const int frac_bits_y = 6 - upsample_left;
569 
570   for (int r = 0; r < bh; ++r) {
571     for (int c = 0; c < bw; ++c) {
572       int val;
573       int y = r + 1;
574       int x = (c << 6) - y * dx;
575       const int base_x = x >> frac_bits_x;
576       if (base_x >= min_base_x) {
577         const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
578         val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
579         val = ROUND_POWER_OF_TWO(val, 5);
580       } else {
581         x = c + 1;
582         y = (r << 6) - x * dy;
583         const int base_y = y >> frac_bits_y;
584         assert(base_y >= min_base_y);
585         const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
586         val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
587         val = ROUND_POWER_OF_TWO(val, 5);
588       }
589       dst[c] = val;
590     }
591     dst += stride;
592   }
593 }
594 
595 // Directional prediction, zone 3: 180 < angle < 270
av1_dr_prediction_z3_c(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left,int upsample_left,int dx,int dy)596 void av1_dr_prediction_z3_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
597                             const uint8_t *above, const uint8_t *left,
598                             int upsample_left, int dx, int dy) {
599   int r, c, y, base, shift, val;
600 
601   (void)above;
602   (void)dx;
603 
604   assert(dx == 1);
605   assert(dy > 0);
606 
607   const int max_base_y = (bw + bh - 1) << upsample_left;
608   const int frac_bits = 6 - upsample_left;
609   const int base_inc = 1 << upsample_left;
610   y = dy;
611   for (c = 0; c < bw; ++c, y += dy) {
612     base = y >> frac_bits;
613     shift = ((y << upsample_left) & 0x3F) >> 1;
614 
615     for (r = 0; r < bh; ++r, base += base_inc) {
616       if (base < max_base_y) {
617         val = left[base] * (32 - shift) + left[base + 1] * shift;
618         dst[r * stride + c] = val = ROUND_POWER_OF_TWO(val, 5);
619       } else {
620         for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
621         break;
622       }
623     }
624   }
625 }
626 
dr_predictor(uint8_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint8_t * above,const uint8_t * left,int upsample_above,int upsample_left,int angle)627 static void dr_predictor(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
628                          const uint8_t *above, const uint8_t *left,
629                          int upsample_above, int upsample_left, int angle) {
630   const int dx = av1_get_dx(angle);
631   const int dy = av1_get_dy(angle);
632   const int bw = tx_size_wide[tx_size];
633   const int bh = tx_size_high[tx_size];
634   assert(angle > 0 && angle < 270);
635 
636   if (angle > 0 && angle < 90) {
637     av1_dr_prediction_z1(dst, stride, bw, bh, above, left, upsample_above, dx,
638                          dy);
639   } else if (angle > 90 && angle < 180) {
640     av1_dr_prediction_z2(dst, stride, bw, bh, above, left, upsample_above,
641                          upsample_left, dx, dy);
642   } else if (angle > 180 && angle < 270) {
643     av1_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left, dx,
644                          dy);
645   } else if (angle == 90) {
646     pred[V_PRED][tx_size](dst, stride, above, left);
647   } else if (angle == 180) {
648     pred[H_PRED][tx_size](dst, stride, above, left);
649   }
650 }
651 
652 #if CONFIG_AV1_HIGHBITDEPTH
653 // Directional prediction, zone 1: 0 < angle < 90
av1_highbd_dr_prediction_z1_c(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int upsample_above,int dx,int dy,int bd)654 void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw,
655                                    int bh, const uint16_t *above,
656                                    const uint16_t *left, int upsample_above,
657                                    int dx, int dy, int bd) {
658   int r, c, x, base, shift, val;
659 
660   (void)left;
661   (void)dy;
662   (void)bd;
663   assert(dy == 1);
664   assert(dx > 0);
665 
666   const int max_base_x = ((bw + bh) - 1) << upsample_above;
667   const int frac_bits = 6 - upsample_above;
668   const int base_inc = 1 << upsample_above;
669   x = dx;
670   for (r = 0; r < bh; ++r, dst += stride, x += dx) {
671     base = x >> frac_bits;
672     shift = ((x << upsample_above) & 0x3F) >> 1;
673 
674     if (base >= max_base_x) {
675       for (int i = r; i < bh; ++i) {
676         aom_memset16(dst, above[max_base_x], bw);
677         dst += stride;
678       }
679       return;
680     }
681 
682     for (c = 0; c < bw; ++c, base += base_inc) {
683       if (base < max_base_x) {
684         val = above[base] * (32 - shift) + above[base + 1] * shift;
685         dst[c] = ROUND_POWER_OF_TWO(val, 5);
686       } else {
687         dst[c] = above[max_base_x];
688       }
689     }
690   }
691 }
692 
693 // Directional prediction, zone 2: 90 < angle < 180
av1_highbd_dr_prediction_z2_c(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int upsample_above,int upsample_left,int dx,int dy,int bd)694 void av1_highbd_dr_prediction_z2_c(uint16_t *dst, ptrdiff_t stride, int bw,
695                                    int bh, const uint16_t *above,
696                                    const uint16_t *left, int upsample_above,
697                                    int upsample_left, int dx, int dy, int bd) {
698   (void)bd;
699   assert(dx > 0);
700   assert(dy > 0);
701 
702   const int min_base_x = -(1 << upsample_above);
703   const int min_base_y = -(1 << upsample_left);
704   (void)min_base_y;
705   const int frac_bits_x = 6 - upsample_above;
706   const int frac_bits_y = 6 - upsample_left;
707 
708   for (int r = 0; r < bh; ++r) {
709     for (int c = 0; c < bw; ++c) {
710       int val;
711       int y = r + 1;
712       int x = (c << 6) - y * dx;
713       const int base_x = x >> frac_bits_x;
714       if (base_x >= min_base_x) {
715         const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
716         val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
717         val = ROUND_POWER_OF_TWO(val, 5);
718       } else {
719         x = c + 1;
720         y = (r << 6) - x * dy;
721         const int base_y = y >> frac_bits_y;
722         assert(base_y >= min_base_y);
723         const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
724         val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
725         val = ROUND_POWER_OF_TWO(val, 5);
726       }
727       dst[c] = val;
728     }
729     dst += stride;
730   }
731 }
732 
733 // Directional prediction, zone 3: 180 < angle < 270
av1_highbd_dr_prediction_z3_c(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int upsample_left,int dx,int dy,int bd)734 void av1_highbd_dr_prediction_z3_c(uint16_t *dst, ptrdiff_t stride, int bw,
735                                    int bh, const uint16_t *above,
736                                    const uint16_t *left, int upsample_left,
737                                    int dx, int dy, int bd) {
738   int r, c, y, base, shift, val;
739 
740   (void)above;
741   (void)dx;
742   (void)bd;
743   assert(dx == 1);
744   assert(dy > 0);
745 
746   const int max_base_y = (bw + bh - 1) << upsample_left;
747   const int frac_bits = 6 - upsample_left;
748   const int base_inc = 1 << upsample_left;
749   y = dy;
750   for (c = 0; c < bw; ++c, y += dy) {
751     base = y >> frac_bits;
752     shift = ((y << upsample_left) & 0x3F) >> 1;
753 
754     for (r = 0; r < bh; ++r, base += base_inc) {
755       if (base < max_base_y) {
756         val = left[base] * (32 - shift) + left[base + 1] * shift;
757         dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5);
758       } else {
759         for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
760         break;
761       }
762     }
763   }
764 }
765 
highbd_dr_predictor(uint16_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint16_t * above,const uint16_t * left,int upsample_above,int upsample_left,int angle,int bd)766 static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride,
767                                 TX_SIZE tx_size, const uint16_t *above,
768                                 const uint16_t *left, int upsample_above,
769                                 int upsample_left, int angle, int bd) {
770   const int dx = av1_get_dx(angle);
771   const int dy = av1_get_dy(angle);
772   const int bw = tx_size_wide[tx_size];
773   const int bh = tx_size_high[tx_size];
774   assert(angle > 0 && angle < 270);
775 
776   if (angle > 0 && angle < 90) {
777     av1_highbd_dr_prediction_z1(dst, stride, bw, bh, above, left,
778                                 upsample_above, dx, dy, bd);
779   } else if (angle > 90 && angle < 180) {
780     av1_highbd_dr_prediction_z2(dst, stride, bw, bh, above, left,
781                                 upsample_above, upsample_left, dx, dy, bd);
782   } else if (angle > 180 && angle < 270) {
783     av1_highbd_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left,
784                                 dx, dy, bd);
785   } else if (angle == 90) {
786     pred_high[V_PRED][tx_size](dst, stride, above, left, bd);
787   } else if (angle == 180) {
788     pred_high[H_PRED][tx_size](dst, stride, above, left, bd);
789   }
790 }
791 #endif  // CONFIG_AV1_HIGHBITDEPTH
792 
793 DECLARE_ALIGNED(16, const int8_t,
794                 av1_filter_intra_taps[FILTER_INTRA_MODES][8][8]) = {
795   {
796       { -6, 10, 0, 0, 0, 12, 0, 0 },
797       { -5, 2, 10, 0, 0, 9, 0, 0 },
798       { -3, 1, 1, 10, 0, 7, 0, 0 },
799       { -3, 1, 1, 2, 10, 5, 0, 0 },
800       { -4, 6, 0, 0, 0, 2, 12, 0 },
801       { -3, 2, 6, 0, 0, 2, 9, 0 },
802       { -3, 2, 2, 6, 0, 2, 7, 0 },
803       { -3, 1, 2, 2, 6, 3, 5, 0 },
804   },
805   {
806       { -10, 16, 0, 0, 0, 10, 0, 0 },
807       { -6, 0, 16, 0, 0, 6, 0, 0 },
808       { -4, 0, 0, 16, 0, 4, 0, 0 },
809       { -2, 0, 0, 0, 16, 2, 0, 0 },
810       { -10, 16, 0, 0, 0, 0, 10, 0 },
811       { -6, 0, 16, 0, 0, 0, 6, 0 },
812       { -4, 0, 0, 16, 0, 0, 4, 0 },
813       { -2, 0, 0, 0, 16, 0, 2, 0 },
814   },
815   {
816       { -8, 8, 0, 0, 0, 16, 0, 0 },
817       { -8, 0, 8, 0, 0, 16, 0, 0 },
818       { -8, 0, 0, 8, 0, 16, 0, 0 },
819       { -8, 0, 0, 0, 8, 16, 0, 0 },
820       { -4, 4, 0, 0, 0, 0, 16, 0 },
821       { -4, 0, 4, 0, 0, 0, 16, 0 },
822       { -4, 0, 0, 4, 0, 0, 16, 0 },
823       { -4, 0, 0, 0, 4, 0, 16, 0 },
824   },
825   {
826       { -2, 8, 0, 0, 0, 10, 0, 0 },
827       { -1, 3, 8, 0, 0, 6, 0, 0 },
828       { -1, 2, 3, 8, 0, 4, 0, 0 },
829       { 0, 1, 2, 3, 8, 2, 0, 0 },
830       { -1, 4, 0, 0, 0, 3, 10, 0 },
831       { -1, 3, 4, 0, 0, 4, 6, 0 },
832       { -1, 2, 3, 4, 0, 4, 4, 0 },
833       { -1, 2, 2, 3, 4, 3, 3, 0 },
834   },
835   {
836       { -12, 14, 0, 0, 0, 14, 0, 0 },
837       { -10, 0, 14, 0, 0, 12, 0, 0 },
838       { -9, 0, 0, 14, 0, 11, 0, 0 },
839       { -8, 0, 0, 0, 14, 10, 0, 0 },
840       { -10, 12, 0, 0, 0, 0, 14, 0 },
841       { -9, 1, 12, 0, 0, 0, 12, 0 },
842       { -8, 0, 0, 12, 0, 1, 11, 0 },
843       { -7, 0, 0, 1, 12, 1, 9, 0 },
844   },
845 };
846 
av1_filter_intra_predictor_c(uint8_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint8_t * above,const uint8_t * left,int mode)847 void av1_filter_intra_predictor_c(uint8_t *dst, ptrdiff_t stride,
848                                   TX_SIZE tx_size, const uint8_t *above,
849                                   const uint8_t *left, int mode) {
850   int r, c;
851   uint8_t buffer[33][33];
852   const int bw = tx_size_wide[tx_size];
853   const int bh = tx_size_high[tx_size];
854 
855   assert(bw <= 32 && bh <= 32);
856 
857   // The initialization is just for silencing Jenkins static analysis warnings
858   for (r = 0; r < bh + 1; ++r)
859     memset(buffer[r], 0, (bw + 1) * sizeof(buffer[0][0]));
860 
861   for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
862   memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(uint8_t));
863 
864   for (r = 1; r < bh + 1; r += 2)
865     for (c = 1; c < bw + 1; c += 4) {
866       const uint8_t p0 = buffer[r - 1][c - 1];
867       const uint8_t p1 = buffer[r - 1][c];
868       const uint8_t p2 = buffer[r - 1][c + 1];
869       const uint8_t p3 = buffer[r - 1][c + 2];
870       const uint8_t p4 = buffer[r - 1][c + 3];
871       const uint8_t p5 = buffer[r][c - 1];
872       const uint8_t p6 = buffer[r + 1][c - 1];
873       for (int k = 0; k < 8; ++k) {
874         int r_offset = k >> 2;
875         int c_offset = k & 0x03;
876         buffer[r + r_offset][c + c_offset] =
877             clip_pixel(ROUND_POWER_OF_TWO_SIGNED(
878                 av1_filter_intra_taps[mode][k][0] * p0 +
879                     av1_filter_intra_taps[mode][k][1] * p1 +
880                     av1_filter_intra_taps[mode][k][2] * p2 +
881                     av1_filter_intra_taps[mode][k][3] * p3 +
882                     av1_filter_intra_taps[mode][k][4] * p4 +
883                     av1_filter_intra_taps[mode][k][5] * p5 +
884                     av1_filter_intra_taps[mode][k][6] * p6,
885                 FILTER_INTRA_SCALE_BITS));
886       }
887     }
888 
889   for (r = 0; r < bh; ++r) {
890     memcpy(dst, &buffer[r + 1][1], bw * sizeof(uint8_t));
891     dst += stride;
892   }
893 }
894 
895 #if CONFIG_AV1_HIGHBITDEPTH
highbd_filter_intra_predictor(uint16_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint16_t * above,const uint16_t * left,int mode,int bd)896 static void highbd_filter_intra_predictor(uint16_t *dst, ptrdiff_t stride,
897                                           TX_SIZE tx_size,
898                                           const uint16_t *above,
899                                           const uint16_t *left, int mode,
900                                           int bd) {
901   int r, c;
902   uint16_t buffer[33][33];
903   const int bw = tx_size_wide[tx_size];
904   const int bh = tx_size_high[tx_size];
905 
906   assert(bw <= 32 && bh <= 32);
907 
908   // The initialization is just for silencing Jenkins static analysis warnings
909   for (r = 0; r < bh + 1; ++r)
910     memset(buffer[r], 0, (bw + 1) * sizeof(buffer[0][0]));
911 
912   for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
913   memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(buffer[0][0]));
914 
915   for (r = 1; r < bh + 1; r += 2)
916     for (c = 1; c < bw + 1; c += 4) {
917       const uint16_t p0 = buffer[r - 1][c - 1];
918       const uint16_t p1 = buffer[r - 1][c];
919       const uint16_t p2 = buffer[r - 1][c + 1];
920       const uint16_t p3 = buffer[r - 1][c + 2];
921       const uint16_t p4 = buffer[r - 1][c + 3];
922       const uint16_t p5 = buffer[r][c - 1];
923       const uint16_t p6 = buffer[r + 1][c - 1];
924       for (int k = 0; k < 8; ++k) {
925         int r_offset = k >> 2;
926         int c_offset = k & 0x03;
927         buffer[r + r_offset][c + c_offset] =
928             clip_pixel_highbd(ROUND_POWER_OF_TWO_SIGNED(
929                                   av1_filter_intra_taps[mode][k][0] * p0 +
930                                       av1_filter_intra_taps[mode][k][1] * p1 +
931                                       av1_filter_intra_taps[mode][k][2] * p2 +
932                                       av1_filter_intra_taps[mode][k][3] * p3 +
933                                       av1_filter_intra_taps[mode][k][4] * p4 +
934                                       av1_filter_intra_taps[mode][k][5] * p5 +
935                                       av1_filter_intra_taps[mode][k][6] * p6,
936                                   FILTER_INTRA_SCALE_BITS),
937                               bd);
938       }
939     }
940 
941   for (r = 0; r < bh; ++r) {
942     memcpy(dst, &buffer[r + 1][1], bw * sizeof(dst[0]));
943     dst += stride;
944   }
945 }
946 #endif  // CONFIG_AV1_HIGHBITDEPTH
947 
is_smooth(const MB_MODE_INFO * mbmi,int plane)948 static int is_smooth(const MB_MODE_INFO *mbmi, int plane) {
949   if (plane == 0) {
950     const PREDICTION_MODE mode = mbmi->mode;
951     return (mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
952             mode == SMOOTH_H_PRED);
953   } else {
954     // uv_mode is not set for inter blocks, so need to explicitly
955     // detect that case.
956     if (is_inter_block(mbmi)) return 0;
957 
958     const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
959     return (uv_mode == UV_SMOOTH_PRED || uv_mode == UV_SMOOTH_V_PRED ||
960             uv_mode == UV_SMOOTH_H_PRED);
961   }
962 }
963 
get_filt_type(const MACROBLOCKD * xd,int plane)964 static int get_filt_type(const MACROBLOCKD *xd, int plane) {
965   int ab_sm, le_sm;
966 
967   if (plane == 0) {
968     const MB_MODE_INFO *ab = xd->above_mbmi;
969     const MB_MODE_INFO *le = xd->left_mbmi;
970     ab_sm = ab ? is_smooth(ab, plane) : 0;
971     le_sm = le ? is_smooth(le, plane) : 0;
972   } else {
973     const MB_MODE_INFO *ab = xd->chroma_above_mbmi;
974     const MB_MODE_INFO *le = xd->chroma_left_mbmi;
975     ab_sm = ab ? is_smooth(ab, plane) : 0;
976     le_sm = le ? is_smooth(le, plane) : 0;
977   }
978 
979   return (ab_sm || le_sm) ? 1 : 0;
980 }
981 
intra_edge_filter_strength(int bs0,int bs1,int delta,int type)982 static int intra_edge_filter_strength(int bs0, int bs1, int delta, int type) {
983   const int d = abs(delta);
984   int strength = 0;
985 
986   const int blk_wh = bs0 + bs1;
987   if (type == 0) {
988     if (blk_wh <= 8) {
989       if (d >= 56) strength = 1;
990     } else if (blk_wh <= 12) {
991       if (d >= 40) strength = 1;
992     } else if (blk_wh <= 16) {
993       if (d >= 40) strength = 1;
994     } else if (blk_wh <= 24) {
995       if (d >= 8) strength = 1;
996       if (d >= 16) strength = 2;
997       if (d >= 32) strength = 3;
998     } else if (blk_wh <= 32) {
999       if (d >= 1) strength = 1;
1000       if (d >= 4) strength = 2;
1001       if (d >= 32) strength = 3;
1002     } else {
1003       if (d >= 1) strength = 3;
1004     }
1005   } else {
1006     if (blk_wh <= 8) {
1007       if (d >= 40) strength = 1;
1008       if (d >= 64) strength = 2;
1009     } else if (blk_wh <= 16) {
1010       if (d >= 20) strength = 1;
1011       if (d >= 48) strength = 2;
1012     } else if (blk_wh <= 24) {
1013       if (d >= 4) strength = 3;
1014     } else {
1015       if (d >= 1) strength = 3;
1016     }
1017   }
1018   return strength;
1019 }
1020 
av1_filter_intra_edge_c(uint8_t * p,int sz,int strength)1021 void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength) {
1022   if (!strength) return;
1023 
1024   const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 },
1025                                                          { 0, 5, 6, 5, 0 },
1026                                                          { 2, 4, 4, 4, 2 } };
1027   const int filt = strength - 1;
1028   uint8_t edge[129];
1029 
1030   memcpy(edge, p, sz * sizeof(*p));
1031   for (int i = 1; i < sz; i++) {
1032     int s = 0;
1033     for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1034       int k = i - 2 + j;
1035       k = (k < 0) ? 0 : k;
1036       k = (k > sz - 1) ? sz - 1 : k;
1037       s += edge[k] * kernel[filt][j];
1038     }
1039     s = (s + 8) >> 4;
1040     p[i] = s;
1041   }
1042 }
1043 
filter_intra_edge_corner(uint8_t * p_above,uint8_t * p_left)1044 static void filter_intra_edge_corner(uint8_t *p_above, uint8_t *p_left) {
1045   const int kernel[3] = { 5, 6, 5 };
1046 
1047   int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1048           (p_above[0] * kernel[2]);
1049   s = (s + 8) >> 4;
1050   p_above[-1] = s;
1051   p_left[-1] = s;
1052 }
1053 
av1_filter_intra_edge_high_c(uint16_t * p,int sz,int strength)1054 void av1_filter_intra_edge_high_c(uint16_t *p, int sz, int strength) {
1055   if (!strength) return;
1056 
1057   const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 },
1058                                                          { 0, 5, 6, 5, 0 },
1059                                                          { 2, 4, 4, 4, 2 } };
1060   const int filt = strength - 1;
1061   uint16_t edge[129];
1062 
1063   memcpy(edge, p, sz * sizeof(*p));
1064   for (int i = 1; i < sz; i++) {
1065     int s = 0;
1066     for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1067       int k = i - 2 + j;
1068       k = (k < 0) ? 0 : k;
1069       k = (k > sz - 1) ? sz - 1 : k;
1070       s += edge[k] * kernel[filt][j];
1071     }
1072     s = (s + 8) >> 4;
1073     p[i] = s;
1074   }
1075 }
1076 
1077 #if CONFIG_AV1_HIGHBITDEPTH
filter_intra_edge_corner_high(uint16_t * p_above,uint16_t * p_left)1078 static void filter_intra_edge_corner_high(uint16_t *p_above, uint16_t *p_left) {
1079   const int kernel[3] = { 5, 6, 5 };
1080 
1081   int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1082           (p_above[0] * kernel[2]);
1083   s = (s + 8) >> 4;
1084   p_above[-1] = s;
1085   p_left[-1] = s;
1086 }
1087 #endif
1088 
av1_upsample_intra_edge_c(uint8_t * p,int sz)1089 void av1_upsample_intra_edge_c(uint8_t *p, int sz) {
1090   // interpolate half-sample positions
1091   assert(sz <= MAX_UPSAMPLE_SZ);
1092 
1093   uint8_t in[MAX_UPSAMPLE_SZ + 3];
1094   // copy p[-1..(sz-1)] and extend first and last samples
1095   in[0] = p[-1];
1096   in[1] = p[-1];
1097   for (int i = 0; i < sz; i++) {
1098     in[i + 2] = p[i];
1099   }
1100   in[sz + 2] = p[sz - 1];
1101 
1102   // interpolate half-sample edge positions
1103   p[-2] = in[0];
1104   for (int i = 0; i < sz; i++) {
1105     int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1106     s = clip_pixel((s + 8) >> 4);
1107     p[2 * i - 1] = s;
1108     p[2 * i] = in[i + 2];
1109   }
1110 }
1111 
av1_upsample_intra_edge_high_c(uint16_t * p,int sz,int bd)1112 void av1_upsample_intra_edge_high_c(uint16_t *p, int sz, int bd) {
1113   // interpolate half-sample positions
1114   assert(sz <= MAX_UPSAMPLE_SZ);
1115 
1116   uint16_t in[MAX_UPSAMPLE_SZ + 3];
1117   // copy p[-1..(sz-1)] and extend first and last samples
1118   in[0] = p[-1];
1119   in[1] = p[-1];
1120   for (int i = 0; i < sz; i++) {
1121     in[i + 2] = p[i];
1122   }
1123   in[sz + 2] = p[sz - 1];
1124 
1125   // interpolate half-sample edge positions
1126   p[-2] = in[0];
1127   for (int i = 0; i < sz; i++) {
1128     int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1129     s = (s + 8) >> 4;
1130     s = clip_pixel_highbd(s, bd);
1131     p[2 * i - 1] = s;
1132     p[2 * i] = in[i + 2];
1133   }
1134 }
1135 #if CONFIG_AV1_HIGHBITDEPTH
build_intra_predictors_high(const MACROBLOCKD * xd,const uint8_t * ref8,int ref_stride,uint8_t * dst8,int dst_stride,PREDICTION_MODE mode,int angle_delta,FILTER_INTRA_MODE filter_intra_mode,TX_SIZE tx_size,int disable_edge_filter,int n_top_px,int n_topright_px,int n_left_px,int n_bottomleft_px,int plane)1136 static void build_intra_predictors_high(
1137     const MACROBLOCKD *xd, const uint8_t *ref8, int ref_stride, uint8_t *dst8,
1138     int dst_stride, PREDICTION_MODE mode, int angle_delta,
1139     FILTER_INTRA_MODE filter_intra_mode, TX_SIZE tx_size,
1140     int disable_edge_filter, int n_top_px, int n_topright_px, int n_left_px,
1141     int n_bottomleft_px, int plane) {
1142   int i;
1143   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
1144   uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
1145   DECLARE_ALIGNED(16, uint16_t, left_data[MAX_TX_SIZE * 2 + 32]);
1146   DECLARE_ALIGNED(16, uint16_t, above_data[MAX_TX_SIZE * 2 + 32]);
1147   uint16_t *const above_row = above_data + 16;
1148   uint16_t *const left_col = left_data + 16;
1149   const int txwpx = tx_size_wide[tx_size];
1150   const int txhpx = tx_size_high[tx_size];
1151   int need_left = extend_modes[mode] & NEED_LEFT;
1152   int need_above = extend_modes[mode] & NEED_ABOVE;
1153   int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1154   const uint16_t *above_ref = ref - ref_stride;
1155   const uint16_t *left_ref = ref - 1;
1156   int p_angle = 0;
1157   const int is_dr_mode = av1_is_directional_mode(mode);
1158   const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1159   int base = 128 << (xd->bd - 8);
1160 
1161   // The default values if ref pixels are not available:
1162   // base   base-1 base-1 .. base-1 base-1 base-1 base-1 base-1 base-1
1163   // base+1   A      B  ..     Y      Z
1164   // base+1   C      D  ..     W      X
1165   // base+1   E      F  ..     U      V
1166   // base+1   G      H  ..     S      T      T      T      T      T
1167 
1168   if (is_dr_mode) {
1169     p_angle = mode_to_angle_map[mode] + angle_delta;
1170     if (p_angle <= 90)
1171       need_above = 1, need_left = 0, need_above_left = 1;
1172     else if (p_angle < 180)
1173       need_above = 1, need_left = 1, need_above_left = 1;
1174     else
1175       need_above = 0, need_left = 1, need_above_left = 1;
1176   }
1177   if (use_filter_intra) need_left = need_above = need_above_left = 1;
1178 
1179   assert(n_top_px >= 0);
1180   assert(n_topright_px >= 0);
1181   assert(n_left_px >= 0);
1182   assert(n_bottomleft_px >= 0);
1183 
1184   if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1185     int val;
1186     if (need_left) {
1187       val = (n_top_px > 0) ? above_ref[0] : base + 1;
1188     } else {
1189       val = (n_left_px > 0) ? left_ref[0] : base - 1;
1190     }
1191     for (i = 0; i < txhpx; ++i) {
1192       aom_memset16(dst, val, txwpx);
1193       dst += dst_stride;
1194     }
1195     return;
1196   }
1197 
1198   // NEED_LEFT
1199   if (need_left) {
1200     int need_bottom = extend_modes[mode] & NEED_BOTTOMLEFT;
1201     if (use_filter_intra) need_bottom = 0;
1202     if (is_dr_mode) need_bottom = p_angle > 180;
1203     const int num_left_pixels_needed = txhpx + (need_bottom ? txwpx : 0);
1204     i = 0;
1205     if (n_left_px > 0) {
1206       for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1207       if (need_bottom && n_bottomleft_px > 0) {
1208         assert(i == txhpx);
1209         for (; i < txhpx + n_bottomleft_px; i++)
1210           left_col[i] = left_ref[i * ref_stride];
1211       }
1212       if (i < num_left_pixels_needed)
1213         aom_memset16(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1214     } else {
1215       if (n_top_px > 0) {
1216         aom_memset16(left_col, above_ref[0], num_left_pixels_needed);
1217       } else {
1218         aom_memset16(left_col, base + 1, num_left_pixels_needed);
1219       }
1220     }
1221   }
1222 
1223   // NEED_ABOVE
1224   if (need_above) {
1225     int need_right = extend_modes[mode] & NEED_ABOVERIGHT;
1226     if (use_filter_intra) need_right = 0;
1227     if (is_dr_mode) need_right = p_angle < 90;
1228     const int num_top_pixels_needed = txwpx + (need_right ? txhpx : 0);
1229     if (n_top_px > 0) {
1230       memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0]));
1231       i = n_top_px;
1232       if (need_right && n_topright_px > 0) {
1233         assert(n_top_px == txwpx);
1234         memcpy(above_row + txwpx, above_ref + txwpx,
1235                n_topright_px * sizeof(above_ref[0]));
1236         i += n_topright_px;
1237       }
1238       if (i < num_top_pixels_needed)
1239         aom_memset16(&above_row[i], above_row[i - 1],
1240                      num_top_pixels_needed - i);
1241     } else {
1242       if (n_left_px > 0) {
1243         aom_memset16(above_row, left_ref[0], num_top_pixels_needed);
1244       } else {
1245         aom_memset16(above_row, base - 1, num_top_pixels_needed);
1246       }
1247     }
1248   }
1249 
1250   if (need_above_left) {
1251     if (n_top_px > 0 && n_left_px > 0) {
1252       above_row[-1] = above_ref[-1];
1253     } else if (n_top_px > 0) {
1254       above_row[-1] = above_ref[0];
1255     } else if (n_left_px > 0) {
1256       above_row[-1] = left_ref[0];
1257     } else {
1258       above_row[-1] = base;
1259     }
1260     left_col[-1] = above_row[-1];
1261   }
1262 
1263   if (use_filter_intra) {
1264     highbd_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1265                                   filter_intra_mode, xd->bd);
1266     return;
1267   }
1268 
1269   if (is_dr_mode) {
1270     int upsample_above = 0;
1271     int upsample_left = 0;
1272     if (!disable_edge_filter) {
1273       const int need_right = p_angle < 90;
1274       const int need_bottom = p_angle > 180;
1275       const int filt_type = get_filt_type(xd, plane);
1276       if (p_angle != 90 && p_angle != 180) {
1277         const int ab_le = need_above_left ? 1 : 0;
1278         if (need_above && need_left && (txwpx + txhpx >= 24)) {
1279           filter_intra_edge_corner_high(above_row, left_col);
1280         }
1281         if (need_above && n_top_px > 0) {
1282           const int strength =
1283               intra_edge_filter_strength(txwpx, txhpx, p_angle - 90, filt_type);
1284           const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1285           av1_filter_intra_edge_high(above_row - ab_le, n_px, strength);
1286         }
1287         if (need_left && n_left_px > 0) {
1288           const int strength = intra_edge_filter_strength(
1289               txhpx, txwpx, p_angle - 180, filt_type);
1290           const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1291           av1_filter_intra_edge_high(left_col - ab_le, n_px, strength);
1292         }
1293       }
1294       upsample_above =
1295           av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90, filt_type);
1296       if (need_above && upsample_above) {
1297         const int n_px = txwpx + (need_right ? txhpx : 0);
1298         av1_upsample_intra_edge_high(above_row, n_px, xd->bd);
1299       }
1300       upsample_left =
1301           av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180, filt_type);
1302       if (need_left && upsample_left) {
1303         const int n_px = txhpx + (need_bottom ? txwpx : 0);
1304         av1_upsample_intra_edge_high(left_col, n_px, xd->bd);
1305       }
1306     }
1307     highbd_dr_predictor(dst, dst_stride, tx_size, above_row, left_col,
1308                         upsample_above, upsample_left, p_angle, xd->bd);
1309     return;
1310   }
1311 
1312   // predict
1313   if (mode == DC_PRED) {
1314     dc_pred_high[n_left_px > 0][n_top_px > 0][tx_size](
1315         dst, dst_stride, above_row, left_col, xd->bd);
1316   } else {
1317     pred_high[mode][tx_size](dst, dst_stride, above_row, left_col, xd->bd);
1318   }
1319 }
1320 #endif  // CONFIG_AV1_HIGHBITDEPTH
1321 
build_intra_predictors(const MACROBLOCKD * xd,const uint8_t * ref,int ref_stride,uint8_t * dst,int dst_stride,PREDICTION_MODE mode,int angle_delta,FILTER_INTRA_MODE filter_intra_mode,TX_SIZE tx_size,int disable_edge_filter,int n_top_px,int n_topright_px,int n_left_px,int n_bottomleft_px,int plane)1322 static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
1323                                    int ref_stride, uint8_t *dst, int dst_stride,
1324                                    PREDICTION_MODE mode, int angle_delta,
1325                                    FILTER_INTRA_MODE filter_intra_mode,
1326                                    TX_SIZE tx_size, int disable_edge_filter,
1327                                    int n_top_px, int n_topright_px,
1328                                    int n_left_px, int n_bottomleft_px,
1329                                    int plane) {
1330   int i;
1331   const uint8_t *above_ref = ref - ref_stride;
1332   const uint8_t *left_ref = ref - 1;
1333   DECLARE_ALIGNED(16, uint8_t, left_data[MAX_TX_SIZE * 2 + 32]);
1334   DECLARE_ALIGNED(16, uint8_t, above_data[MAX_TX_SIZE * 2 + 32]);
1335   uint8_t *const above_row = above_data + 16;
1336   uint8_t *const left_col = left_data + 16;
1337   const int txwpx = tx_size_wide[tx_size];
1338   const int txhpx = tx_size_high[tx_size];
1339   int need_left = extend_modes[mode] & NEED_LEFT;
1340   int need_above = extend_modes[mode] & NEED_ABOVE;
1341   int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1342   int p_angle = 0;
1343   const int is_dr_mode = av1_is_directional_mode(mode);
1344   const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1345 
1346   // The default values if ref pixels are not available:
1347   // 128 127 127 .. 127 127 127 127 127 127
1348   // 129  A   B  ..  Y   Z
1349   // 129  C   D  ..  W   X
1350   // 129  E   F  ..  U   V
1351   // 129  G   H  ..  S   T   T   T   T   T
1352   // ..
1353 
1354   if (is_dr_mode) {
1355     p_angle = mode_to_angle_map[mode] + angle_delta;
1356     if (p_angle <= 90)
1357       need_above = 1, need_left = 0, need_above_left = 1;
1358     else if (p_angle < 180)
1359       need_above = 1, need_left = 1, need_above_left = 1;
1360     else
1361       need_above = 0, need_left = 1, need_above_left = 1;
1362   }
1363   if (use_filter_intra) need_left = need_above = need_above_left = 1;
1364 
1365   assert(n_top_px >= 0);
1366   assert(n_topright_px >= 0);
1367   assert(n_left_px >= 0);
1368   assert(n_bottomleft_px >= 0);
1369 
1370   if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1371     int val;
1372     if (need_left) {
1373       val = (n_top_px > 0) ? above_ref[0] : 129;
1374     } else {
1375       val = (n_left_px > 0) ? left_ref[0] : 127;
1376     }
1377     for (i = 0; i < txhpx; ++i) {
1378       memset(dst, val, txwpx);
1379       dst += dst_stride;
1380     }
1381     return;
1382   }
1383 
1384   // NEED_LEFT
1385   if (need_left) {
1386     int need_bottom = extend_modes[mode] & NEED_BOTTOMLEFT;
1387     if (use_filter_intra) need_bottom = 0;
1388     if (is_dr_mode) need_bottom = p_angle > 180;
1389     // the avx2 dr_prediction_z2 may read at most 3 extra bytes,
1390     // due to the avx2 mask load is with dword granularity.
1391     // so we initialize 3 extra bytes to silence valgrind complain.
1392     const int num_left_pixels_needed = txhpx + (need_bottom ? txwpx : 3);
1393     i = 0;
1394     if (n_left_px > 0) {
1395       for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1396       if (need_bottom && n_bottomleft_px > 0) {
1397         assert(i == txhpx);
1398         for (; i < txhpx + n_bottomleft_px; i++)
1399           left_col[i] = left_ref[i * ref_stride];
1400       }
1401       if (i < num_left_pixels_needed)
1402         memset(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1403     } else {
1404       if (n_top_px > 0) {
1405         memset(left_col, above_ref[0], num_left_pixels_needed);
1406       } else {
1407         memset(left_col, 129, num_left_pixels_needed);
1408       }
1409     }
1410   }
1411 
1412   // NEED_ABOVE
1413   if (need_above) {
1414     int need_right = extend_modes[mode] & NEED_ABOVERIGHT;
1415     if (use_filter_intra) need_right = 0;
1416     if (is_dr_mode) need_right = p_angle < 90;
1417     const int num_top_pixels_needed = txwpx + (need_right ? txhpx : 0);
1418     if (n_top_px > 0) {
1419       memcpy(above_row, above_ref, n_top_px);
1420       i = n_top_px;
1421       if (need_right && n_topright_px > 0) {
1422         assert(n_top_px == txwpx);
1423         memcpy(above_row + txwpx, above_ref + txwpx, n_topright_px);
1424         i += n_topright_px;
1425       }
1426       if (i < num_top_pixels_needed)
1427         memset(&above_row[i], above_row[i - 1], num_top_pixels_needed - i);
1428     } else {
1429       if (n_left_px > 0) {
1430         memset(above_row, left_ref[0], num_top_pixels_needed);
1431       } else {
1432         memset(above_row, 127, num_top_pixels_needed);
1433       }
1434     }
1435   }
1436 
1437   if (need_above_left) {
1438     if (n_top_px > 0 && n_left_px > 0) {
1439       above_row[-1] = above_ref[-1];
1440     } else if (n_top_px > 0) {
1441       above_row[-1] = above_ref[0];
1442     } else if (n_left_px > 0) {
1443       above_row[-1] = left_ref[0];
1444     } else {
1445       above_row[-1] = 128;
1446     }
1447     left_col[-1] = above_row[-1];
1448   }
1449 
1450   if (use_filter_intra) {
1451     av1_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1452                                filter_intra_mode);
1453     return;
1454   }
1455 
1456   if (is_dr_mode) {
1457     int upsample_above = 0;
1458     int upsample_left = 0;
1459     if (!disable_edge_filter) {
1460       const int need_right = p_angle < 90;
1461       const int need_bottom = p_angle > 180;
1462       const int filt_type = get_filt_type(xd, plane);
1463       if (p_angle != 90 && p_angle != 180) {
1464         const int ab_le = need_above_left ? 1 : 0;
1465         if (need_above && need_left && (txwpx + txhpx >= 24)) {
1466           filter_intra_edge_corner(above_row, left_col);
1467         }
1468         if (need_above && n_top_px > 0) {
1469           const int strength =
1470               intra_edge_filter_strength(txwpx, txhpx, p_angle - 90, filt_type);
1471           const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1472           av1_filter_intra_edge(above_row - ab_le, n_px, strength);
1473         }
1474         if (need_left && n_left_px > 0) {
1475           const int strength = intra_edge_filter_strength(
1476               txhpx, txwpx, p_angle - 180, filt_type);
1477           const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1478           av1_filter_intra_edge(left_col - ab_le, n_px, strength);
1479         }
1480       }
1481       upsample_above =
1482           av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90, filt_type);
1483       if (need_above && upsample_above) {
1484         const int n_px = txwpx + (need_right ? txhpx : 0);
1485         av1_upsample_intra_edge(above_row, n_px);
1486       }
1487       upsample_left =
1488           av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180, filt_type);
1489       if (need_left && upsample_left) {
1490         const int n_px = txhpx + (need_bottom ? txwpx : 0);
1491         av1_upsample_intra_edge(left_col, n_px);
1492       }
1493     }
1494     dr_predictor(dst, dst_stride, tx_size, above_row, left_col, upsample_above,
1495                  upsample_left, p_angle);
1496     return;
1497   }
1498 
1499   // predict
1500   if (mode == DC_PRED) {
1501     dc_pred[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride, above_row,
1502                                                   left_col);
1503   } else {
1504     pred[mode][tx_size](dst, dst_stride, above_row, left_col);
1505   }
1506 }
1507 
scale_chroma_bsize(BLOCK_SIZE bsize,int subsampling_x,int subsampling_y)1508 static INLINE BLOCK_SIZE scale_chroma_bsize(BLOCK_SIZE bsize, int subsampling_x,
1509                                             int subsampling_y) {
1510   assert(subsampling_x >= 0 && subsampling_x < 2);
1511   assert(subsampling_y >= 0 && subsampling_y < 2);
1512   BLOCK_SIZE bs = bsize;
1513   switch (bsize) {
1514     case BLOCK_4X4:
1515       if (subsampling_x == 1 && subsampling_y == 1)
1516         bs = BLOCK_8X8;
1517       else if (subsampling_x == 1)
1518         bs = BLOCK_8X4;
1519       else if (subsampling_y == 1)
1520         bs = BLOCK_4X8;
1521       break;
1522     case BLOCK_4X8:
1523       if (subsampling_x == 1 && subsampling_y == 1)
1524         bs = BLOCK_8X8;
1525       else if (subsampling_x == 1)
1526         bs = BLOCK_8X8;
1527       else if (subsampling_y == 1)
1528         bs = BLOCK_4X8;
1529       break;
1530     case BLOCK_8X4:
1531       if (subsampling_x == 1 && subsampling_y == 1)
1532         bs = BLOCK_8X8;
1533       else if (subsampling_x == 1)
1534         bs = BLOCK_8X4;
1535       else if (subsampling_y == 1)
1536         bs = BLOCK_8X8;
1537       break;
1538     case BLOCK_4X16:
1539       if (subsampling_x == 1 && subsampling_y == 1)
1540         bs = BLOCK_8X16;
1541       else if (subsampling_x == 1)
1542         bs = BLOCK_8X16;
1543       else if (subsampling_y == 1)
1544         bs = BLOCK_4X16;
1545       break;
1546     case BLOCK_16X4:
1547       if (subsampling_x == 1 && subsampling_y == 1)
1548         bs = BLOCK_16X8;
1549       else if (subsampling_x == 1)
1550         bs = BLOCK_16X4;
1551       else if (subsampling_y == 1)
1552         bs = BLOCK_16X8;
1553       break;
1554     default: break;
1555   }
1556   return bs;
1557 }
1558 
av1_predict_intra_block(const AV1_COMMON * cm,const MACROBLOCKD * xd,int wpx,int hpx,TX_SIZE tx_size,PREDICTION_MODE mode,int angle_delta,int use_palette,FILTER_INTRA_MODE filter_intra_mode,const uint8_t * ref,int ref_stride,uint8_t * dst,int dst_stride,int col_off,int row_off,int plane)1559 void av1_predict_intra_block(
1560     const AV1_COMMON *cm, const MACROBLOCKD *xd, int wpx, int hpx,
1561     TX_SIZE tx_size, PREDICTION_MODE mode, int angle_delta, int use_palette,
1562     FILTER_INTRA_MODE filter_intra_mode, const uint8_t *ref, int ref_stride,
1563     uint8_t *dst, int dst_stride, int col_off, int row_off, int plane) {
1564   const MB_MODE_INFO *const mbmi = xd->mi[0];
1565   const int txwpx = tx_size_wide[tx_size];
1566   const int txhpx = tx_size_high[tx_size];
1567   const int x = col_off << MI_SIZE_LOG2;
1568   const int y = row_off << MI_SIZE_LOG2;
1569 
1570   if (use_palette) {
1571     int r, c;
1572     const uint8_t *const map = xd->plane[plane != 0].color_index_map +
1573                                xd->color_index_map_offset[plane != 0];
1574     const uint16_t *const palette =
1575         mbmi->palette_mode_info.palette_colors + plane * PALETTE_MAX_SIZE;
1576     if (is_cur_buf_hbd(xd)) {
1577       uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
1578       for (r = 0; r < txhpx; ++r) {
1579         for (c = 0; c < txwpx; ++c) {
1580           dst16[r * dst_stride + c] = palette[map[(r + y) * wpx + c + x]];
1581         }
1582       }
1583     } else {
1584       for (r = 0; r < txhpx; ++r) {
1585         for (c = 0; c < txwpx; ++c) {
1586           dst[r * dst_stride + c] =
1587               (uint8_t)palette[map[(r + y) * wpx + c + x]];
1588         }
1589       }
1590     }
1591     return;
1592   }
1593 
1594   const struct macroblockd_plane *const pd = &xd->plane[plane];
1595   const int txw = tx_size_wide_unit[tx_size];
1596   const int txh = tx_size_high_unit[tx_size];
1597   const int ss_x = pd->subsampling_x;
1598   const int ss_y = pd->subsampling_y;
1599   const int have_top =
1600       row_off || (ss_y ? xd->chroma_up_available : xd->up_available);
1601   const int have_left =
1602       col_off || (ss_x ? xd->chroma_left_available : xd->left_available);
1603   const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
1604   const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2);
1605   const int xr_chr_offset = 0;
1606   const int yd_chr_offset = 0;
1607 
1608   // Distance between the right edge of this prediction block to
1609   // the frame right edge
1610   const int xr =
1611       (xd->mb_to_right_edge >> (3 + ss_x)) + (wpx - x - txwpx) - xr_chr_offset;
1612   // Distance between the bottom edge of this prediction block to
1613   // the frame bottom edge
1614   const int yd =
1615       (xd->mb_to_bottom_edge >> (3 + ss_y)) + (hpx - y - txhpx) - yd_chr_offset;
1616   const int right_available =
1617       mi_col + ((col_off + txw) << ss_x) < xd->tile.mi_col_end;
1618   const int bottom_available =
1619       (yd > 0) && (mi_row + ((row_off + txh) << ss_y) < xd->tile.mi_row_end);
1620 
1621   const PARTITION_TYPE partition = mbmi->partition;
1622 
1623   BLOCK_SIZE bsize = mbmi->sb_type;
1624   // force 4x4 chroma component block size.
1625   if (ss_x || ss_y) {
1626     bsize = scale_chroma_bsize(bsize, ss_x, ss_y);
1627   }
1628 
1629   const int have_top_right =
1630       has_top_right(cm, bsize, mi_row, mi_col, have_top, right_available,
1631                     partition, tx_size, row_off, col_off, ss_x, ss_y);
1632   const int have_bottom_left =
1633       has_bottom_left(cm, bsize, mi_row, mi_col, bottom_available, have_left,
1634                       partition, tx_size, row_off, col_off, ss_x, ss_y);
1635 
1636   const int disable_edge_filter = !cm->seq_params.enable_intra_edge_filter;
1637 #if CONFIG_AV1_HIGHBITDEPTH
1638   if (is_cur_buf_hbd(xd)) {
1639     build_intra_predictors_high(
1640         xd, ref, ref_stride, dst, dst_stride, mode, angle_delta,
1641         filter_intra_mode, tx_size, disable_edge_filter,
1642         have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
1643         have_top_right ? AOMMIN(txwpx, xr) : 0,
1644         have_left ? AOMMIN(txhpx, yd + txhpx) : 0,
1645         have_bottom_left ? AOMMIN(txhpx, yd) : 0, plane);
1646     return;
1647   }
1648 #endif
1649   build_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode,
1650                          angle_delta, filter_intra_mode, tx_size,
1651                          disable_edge_filter,
1652                          have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
1653                          have_top_right ? AOMMIN(txwpx, xr) : 0,
1654                          have_left ? AOMMIN(txhpx, yd + txhpx) : 0,
1655                          have_bottom_left ? AOMMIN(txhpx, yd) : 0, plane);
1656 }
1657 
av1_predict_intra_block_facade(const AV1_COMMON * cm,MACROBLOCKD * xd,int plane,int blk_col,int blk_row,TX_SIZE tx_size)1658 void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd,
1659                                     int plane, int blk_col, int blk_row,
1660                                     TX_SIZE tx_size) {
1661   const MB_MODE_INFO *const mbmi = xd->mi[0];
1662   struct macroblockd_plane *const pd = &xd->plane[plane];
1663   const int dst_stride = pd->dst.stride;
1664   uint8_t *dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << MI_SIZE_LOG2];
1665   const PREDICTION_MODE mode =
1666       (plane == AOM_PLANE_Y) ? mbmi->mode : get_uv_mode(mbmi->uv_mode);
1667   const int use_palette = mbmi->palette_mode_info.palette_size[plane != 0] > 0;
1668   const FILTER_INTRA_MODE filter_intra_mode =
1669       (plane == AOM_PLANE_Y && mbmi->filter_intra_mode_info.use_filter_intra)
1670           ? mbmi->filter_intra_mode_info.filter_intra_mode
1671           : FILTER_INTRA_MODES;
1672   const int angle_delta = mbmi->angle_delta[plane != AOM_PLANE_Y] * ANGLE_STEP;
1673 
1674   if (plane != AOM_PLANE_Y && mbmi->uv_mode == UV_CFL_PRED) {
1675 #if CONFIG_DEBUG
1676     assert(is_cfl_allowed(xd));
1677     const BLOCK_SIZE plane_bsize = get_plane_block_size(
1678         mbmi->sb_type, pd->subsampling_x, pd->subsampling_y);
1679     (void)plane_bsize;
1680     assert(plane_bsize < BLOCK_SIZES_ALL);
1681     if (!xd->lossless[mbmi->segment_id]) {
1682       assert(blk_col == 0);
1683       assert(blk_row == 0);
1684       assert(block_size_wide[plane_bsize] == tx_size_wide[tx_size]);
1685       assert(block_size_high[plane_bsize] == tx_size_high[tx_size]);
1686     }
1687 #endif
1688     CFL_CTX *const cfl = &xd->cfl;
1689     CFL_PRED_TYPE pred_plane = get_cfl_pred_type(plane);
1690     if (cfl->dc_pred_is_cached[pred_plane] == 0) {
1691       av1_predict_intra_block(cm, xd, pd->width, pd->height, tx_size, mode,
1692                               angle_delta, use_palette, filter_intra_mode, dst,
1693                               dst_stride, dst, dst_stride, blk_col, blk_row,
1694                               plane);
1695       if (cfl->use_dc_pred_cache) {
1696         cfl_store_dc_pred(xd, dst, pred_plane, tx_size_wide[tx_size]);
1697         cfl->dc_pred_is_cached[pred_plane] = 1;
1698       }
1699     } else {
1700       cfl_load_dc_pred(xd, dst, dst_stride, tx_size, pred_plane);
1701     }
1702     cfl_predict_block(xd, dst, dst_stride, tx_size, plane);
1703     return;
1704   }
1705   av1_predict_intra_block(cm, xd, pd->width, pd->height, tx_size, mode,
1706                           angle_delta, use_palette, filter_intra_mode, dst,
1707                           dst_stride, dst, dst_stride, blk_col, blk_row, plane);
1708 }
1709 
av1_init_intra_predictors(void)1710 void av1_init_intra_predictors(void) {
1711   aom_once(init_intra_predictors_internal);
1712 }
1713