1 /*
2  *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "./vpx_config.h"
12 #include "./vpx_dsp_rtcd.h"
13 
14 #include "vpx_dsp/vpx_dsp_common.h"
15 #include "vpx_mem/vpx_mem.h"
16 
17 #define DST(x, y) dst[(x) + (y)*stride]
18 #define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2)
19 #define AVG2(a, b) (((a) + (b) + 1) >> 1)
20 
d207_predictor(uint8_t * dst,ptrdiff_t stride,int bs,const uint8_t * above,const uint8_t * left)21 static INLINE void d207_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
22                                   const uint8_t *above, const uint8_t *left) {
23   int r, c;
24   (void)above;
25   // first column
26   for (r = 0; r < bs - 1; ++r) dst[r * stride] = AVG2(left[r], left[r + 1]);
27   dst[(bs - 1) * stride] = left[bs - 1];
28   dst++;
29 
30   // second column
31   for (r = 0; r < bs - 2; ++r)
32     dst[r * stride] = AVG3(left[r], left[r + 1], left[r + 2]);
33   dst[(bs - 2) * stride] = AVG3(left[bs - 2], left[bs - 1], left[bs - 1]);
34   dst[(bs - 1) * stride] = left[bs - 1];
35   dst++;
36 
37   // rest of last row
38   for (c = 0; c < bs - 2; ++c) dst[(bs - 1) * stride + c] = left[bs - 1];
39 
40   for (r = bs - 2; r >= 0; --r)
41     for (c = 0; c < bs - 2; ++c)
42       dst[r * stride + c] = dst[(r + 1) * stride + c - 2];
43 }
44 
d63_predictor(uint8_t * dst,ptrdiff_t stride,int bs,const uint8_t * above,const uint8_t * left)45 static INLINE void d63_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
46                                  const uint8_t *above, const uint8_t *left) {
47   int r, c;
48   int size;
49   (void)left;
50   for (c = 0; c < bs; ++c) {
51     dst[c] = AVG2(above[c], above[c + 1]);
52     dst[stride + c] = AVG3(above[c], above[c + 1], above[c + 2]);
53   }
54   for (r = 2, size = bs - 2; r < bs; r += 2, --size) {
55     memcpy(dst + (r + 0) * stride, dst + (r >> 1), size);
56     memset(dst + (r + 0) * stride + size, above[bs - 1], bs - size);
57     memcpy(dst + (r + 1) * stride, dst + stride + (r >> 1), size);
58     memset(dst + (r + 1) * stride + size, above[bs - 1], bs - size);
59   }
60 }
61 
d45_predictor(uint8_t * dst,ptrdiff_t stride,int bs,const uint8_t * above,const uint8_t * left)62 static INLINE void d45_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
63                                  const uint8_t *above, const uint8_t *left) {
64   const uint8_t above_right = above[bs - 1];
65   const uint8_t *const dst_row0 = dst;
66   int x, size;
67   (void)left;
68 
69   for (x = 0; x < bs - 1; ++x) {
70     dst[x] = AVG3(above[x], above[x + 1], above[x + 2]);
71   }
72   dst[bs - 1] = above_right;
73   dst += stride;
74   for (x = 1, size = bs - 2; x < bs; ++x, --size) {
75     memcpy(dst, dst_row0 + x, size);
76     memset(dst + size, above_right, x + 1);
77     dst += stride;
78   }
79 }
80 
d117_predictor(uint8_t * dst,ptrdiff_t stride,int bs,const uint8_t * above,const uint8_t * left)81 static INLINE void d117_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
82                                   const uint8_t *above, const uint8_t *left) {
83   int r, c;
84 
85   // first row
86   for (c = 0; c < bs; c++) dst[c] = AVG2(above[c - 1], above[c]);
87   dst += stride;
88 
89   // second row
90   dst[0] = AVG3(left[0], above[-1], above[0]);
91   for (c = 1; c < bs; c++) dst[c] = AVG3(above[c - 2], above[c - 1], above[c]);
92   dst += stride;
93 
94   // the rest of first col
95   dst[0] = AVG3(above[-1], left[0], left[1]);
96   for (r = 3; r < bs; ++r)
97     dst[(r - 2) * stride] = AVG3(left[r - 3], left[r - 2], left[r - 1]);
98 
99   // the rest of the block
100   for (r = 2; r < bs; ++r) {
101     for (c = 1; c < bs; c++) dst[c] = dst[-2 * stride + c - 1];
102     dst += stride;
103   }
104 }
105 
d135_predictor(uint8_t * dst,ptrdiff_t stride,int bs,const uint8_t * above,const uint8_t * left)106 static INLINE void d135_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
107                                   const uint8_t *above, const uint8_t *left) {
108   int i;
109 #if defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ > 7
110   // silence a spurious -Warray-bounds warning, possibly related to:
111   // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56273
112   uint8_t border[69];
113 #else
114   uint8_t border[32 + 32 - 1];  // outer border from bottom-left to top-right
115 #endif
116 
117   // dst(bs, bs - 2)[0], i.e., border starting at bottom-left
118   for (i = 0; i < bs - 2; ++i) {
119     border[i] = AVG3(left[bs - 3 - i], left[bs - 2 - i], left[bs - 1 - i]);
120   }
121   border[bs - 2] = AVG3(above[-1], left[0], left[1]);
122   border[bs - 1] = AVG3(left[0], above[-1], above[0]);
123   border[bs - 0] = AVG3(above[-1], above[0], above[1]);
124   // dst[0][2, size), i.e., remaining top border ascending
125   for (i = 0; i < bs - 2; ++i) {
126     border[bs + 1 + i] = AVG3(above[i], above[i + 1], above[i + 2]);
127   }
128 
129   for (i = 0; i < bs; ++i) {
130     memcpy(dst + i * stride, border + bs - 1 - i, bs);
131   }
132 }
133 
d153_predictor(uint8_t * dst,ptrdiff_t stride,int bs,const uint8_t * above,const uint8_t * left)134 static INLINE void d153_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
135                                   const uint8_t *above, const uint8_t *left) {
136   int r, c;
137   dst[0] = AVG2(above[-1], left[0]);
138   for (r = 1; r < bs; r++) dst[r * stride] = AVG2(left[r - 1], left[r]);
139   dst++;
140 
141   dst[0] = AVG3(left[0], above[-1], above[0]);
142   dst[stride] = AVG3(above[-1], left[0], left[1]);
143   for (r = 2; r < bs; r++)
144     dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]);
145   dst++;
146 
147   for (c = 0; c < bs - 2; c++)
148     dst[c] = AVG3(above[c - 1], above[c], above[c + 1]);
149   dst += stride;
150 
151   for (r = 1; r < bs; ++r) {
152     for (c = 0; c < bs - 2; c++) dst[c] = dst[-stride + c - 2];
153     dst += stride;
154   }
155 }
156 
v_predictor(uint8_t * dst,ptrdiff_t stride,int bs,const uint8_t * above,const uint8_t * left)157 static INLINE void v_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
158                                const uint8_t *above, const uint8_t *left) {
159   int r;
160   (void)left;
161 
162   for (r = 0; r < bs; r++) {
163     memcpy(dst, above, bs);
164     dst += stride;
165   }
166 }
167 
h_predictor(uint8_t * dst,ptrdiff_t stride,int bs,const uint8_t * above,const uint8_t * left)168 static INLINE void h_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
169                                const uint8_t *above, const uint8_t *left) {
170   int r;
171   (void)above;
172 
173   for (r = 0; r < bs; r++) {
174     memset(dst, left[r], bs);
175     dst += stride;
176   }
177 }
178 
tm_predictor(uint8_t * dst,ptrdiff_t stride,int bs,const uint8_t * above,const uint8_t * left)179 static INLINE void tm_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
180                                 const uint8_t *above, const uint8_t *left) {
181   int r, c;
182   int ytop_left = above[-1];
183 
184   for (r = 0; r < bs; r++) {
185     for (c = 0; c < bs; c++)
186       dst[c] = clip_pixel(left[r] + above[c] - ytop_left);
187     dst += stride;
188   }
189 }
190 
dc_128_predictor(uint8_t * dst,ptrdiff_t stride,int bs,const uint8_t * above,const uint8_t * left)191 static INLINE void dc_128_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
192                                     const uint8_t *above, const uint8_t *left) {
193   int r;
194   (void)above;
195   (void)left;
196 
197   for (r = 0; r < bs; r++) {
198     memset(dst, 128, bs);
199     dst += stride;
200   }
201 }
202 
dc_left_predictor(uint8_t * dst,ptrdiff_t stride,int bs,const uint8_t * above,const uint8_t * left)203 static INLINE void dc_left_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
204                                      const uint8_t *above,
205                                      const uint8_t *left) {
206   int i, r, expected_dc, sum = 0;
207   (void)above;
208 
209   for (i = 0; i < bs; i++) sum += left[i];
210   expected_dc = (sum + (bs >> 1)) / bs;
211 
212   for (r = 0; r < bs; r++) {
213     memset(dst, expected_dc, bs);
214     dst += stride;
215   }
216 }
217 
dc_top_predictor(uint8_t * dst,ptrdiff_t stride,int bs,const uint8_t * above,const uint8_t * left)218 static INLINE void dc_top_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
219                                     const uint8_t *above, const uint8_t *left) {
220   int i, r, expected_dc, sum = 0;
221   (void)left;
222 
223   for (i = 0; i < bs; i++) sum += above[i];
224   expected_dc = (sum + (bs >> 1)) / bs;
225 
226   for (r = 0; r < bs; r++) {
227     memset(dst, expected_dc, bs);
228     dst += stride;
229   }
230 }
231 
dc_predictor(uint8_t * dst,ptrdiff_t stride,int bs,const uint8_t * above,const uint8_t * left)232 static INLINE void dc_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
233                                 const uint8_t *above, const uint8_t *left) {
234   int i, r, expected_dc, sum = 0;
235   const int count = 2 * bs;
236 
237   for (i = 0; i < bs; i++) {
238     sum += above[i];
239     sum += left[i];
240   }
241 
242   expected_dc = (sum + (count >> 1)) / count;
243 
244   for (r = 0; r < bs; r++) {
245     memset(dst, expected_dc, bs);
246     dst += stride;
247   }
248 }
249 
vpx_he_predictor_4x4_c(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)250 void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
251                             const uint8_t *above, const uint8_t *left) {
252   const int H = above[-1];
253   const int I = left[0];
254   const int J = left[1];
255   const int K = left[2];
256   const int L = left[3];
257 
258   memset(dst + stride * 0, AVG3(H, I, J), 4);
259   memset(dst + stride * 1, AVG3(I, J, K), 4);
260   memset(dst + stride * 2, AVG3(J, K, L), 4);
261   memset(dst + stride * 3, AVG3(K, L, L), 4);
262 }
263 
vpx_ve_predictor_4x4_c(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)264 void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
265                             const uint8_t *above, const uint8_t *left) {
266   const int H = above[-1];
267   const int I = above[0];
268   const int J = above[1];
269   const int K = above[2];
270   const int L = above[3];
271   const int M = above[4];
272   (void)left;
273 
274   dst[0] = AVG3(H, I, J);
275   dst[1] = AVG3(I, J, K);
276   dst[2] = AVG3(J, K, L);
277   dst[3] = AVG3(K, L, M);
278   memcpy(dst + stride * 1, dst, 4);
279   memcpy(dst + stride * 2, dst, 4);
280   memcpy(dst + stride * 3, dst, 4);
281 }
282 
vpx_d207_predictor_4x4_c(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)283 void vpx_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
284                               const uint8_t *above, const uint8_t *left) {
285   const int I = left[0];
286   const int J = left[1];
287   const int K = left[2];
288   const int L = left[3];
289   (void)above;
290   DST(0, 0) = AVG2(I, J);
291   DST(2, 0) = DST(0, 1) = AVG2(J, K);
292   DST(2, 1) = DST(0, 2) = AVG2(K, L);
293   DST(1, 0) = AVG3(I, J, K);
294   DST(3, 0) = DST(1, 1) = AVG3(J, K, L);
295   DST(3, 1) = DST(1, 2) = AVG3(K, L, L);
296   DST(3, 2) = DST(2, 2) = DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
297 }
298 
vpx_d63_predictor_4x4_c(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)299 void vpx_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
300                              const uint8_t *above, const uint8_t *left) {
301   const int A = above[0];
302   const int B = above[1];
303   const int C = above[2];
304   const int D = above[3];
305   const int E = above[4];
306   const int F = above[5];
307   const int G = above[6];
308   (void)left;
309   DST(0, 0) = AVG2(A, B);
310   DST(1, 0) = DST(0, 2) = AVG2(B, C);
311   DST(2, 0) = DST(1, 2) = AVG2(C, D);
312   DST(3, 0) = DST(2, 2) = AVG2(D, E);
313   DST(3, 2) = AVG2(E, F);  // differs from vp8
314 
315   DST(0, 1) = AVG3(A, B, C);
316   DST(1, 1) = DST(0, 3) = AVG3(B, C, D);
317   DST(2, 1) = DST(1, 3) = AVG3(C, D, E);
318   DST(3, 1) = DST(2, 3) = AVG3(D, E, F);
319   DST(3, 3) = AVG3(E, F, G);  // differs from vp8
320 }
321 
vpx_d63e_predictor_4x4_c(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)322 void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
323                               const uint8_t *above, const uint8_t *left) {
324   const int A = above[0];
325   const int B = above[1];
326   const int C = above[2];
327   const int D = above[3];
328   const int E = above[4];
329   const int F = above[5];
330   const int G = above[6];
331   const int H = above[7];
332   (void)left;
333   DST(0, 0) = AVG2(A, B);
334   DST(1, 0) = DST(0, 2) = AVG2(B, C);
335   DST(2, 0) = DST(1, 2) = AVG2(C, D);
336   DST(3, 0) = DST(2, 2) = AVG2(D, E);
337   DST(3, 2) = AVG3(E, F, G);
338 
339   DST(0, 1) = AVG3(A, B, C);
340   DST(1, 1) = DST(0, 3) = AVG3(B, C, D);
341   DST(2, 1) = DST(1, 3) = AVG3(C, D, E);
342   DST(3, 1) = DST(2, 3) = AVG3(D, E, F);
343   DST(3, 3) = AVG3(F, G, H);
344 }
345 
vpx_d45_predictor_4x4_c(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)346 void vpx_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
347                              const uint8_t *above, const uint8_t *left) {
348   const int A = above[0];
349   const int B = above[1];
350   const int C = above[2];
351   const int D = above[3];
352   const int E = above[4];
353   const int F = above[5];
354   const int G = above[6];
355   const int H = above[7];
356   (void)stride;
357   (void)left;
358   DST(0, 0) = AVG3(A, B, C);
359   DST(1, 0) = DST(0, 1) = AVG3(B, C, D);
360   DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E);
361   DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F);
362   DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G);
363   DST(3, 2) = DST(2, 3) = AVG3(F, G, H);
364   DST(3, 3) = H;  // differs from vp8
365 }
366 
vpx_d45e_predictor_4x4_c(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)367 void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
368                               const uint8_t *above, const uint8_t *left) {
369   const int A = above[0];
370   const int B = above[1];
371   const int C = above[2];
372   const int D = above[3];
373   const int E = above[4];
374   const int F = above[5];
375   const int G = above[6];
376   const int H = above[7];
377   (void)stride;
378   (void)left;
379   DST(0, 0) = AVG3(A, B, C);
380   DST(1, 0) = DST(0, 1) = AVG3(B, C, D);
381   DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E);
382   DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F);
383   DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G);
384   DST(3, 2) = DST(2, 3) = AVG3(F, G, H);
385   DST(3, 3) = AVG3(G, H, H);
386 }
387 
vpx_d117_predictor_4x4_c(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)388 void vpx_d117_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
389                               const uint8_t *above, const uint8_t *left) {
390   const int I = left[0];
391   const int J = left[1];
392   const int K = left[2];
393   const int X = above[-1];
394   const int A = above[0];
395   const int B = above[1];
396   const int C = above[2];
397   const int D = above[3];
398   DST(0, 0) = DST(1, 2) = AVG2(X, A);
399   DST(1, 0) = DST(2, 2) = AVG2(A, B);
400   DST(2, 0) = DST(3, 2) = AVG2(B, C);
401   DST(3, 0) = AVG2(C, D);
402 
403   DST(0, 3) = AVG3(K, J, I);
404   DST(0, 2) = AVG3(J, I, X);
405   DST(0, 1) = DST(1, 3) = AVG3(I, X, A);
406   DST(1, 1) = DST(2, 3) = AVG3(X, A, B);
407   DST(2, 1) = DST(3, 3) = AVG3(A, B, C);
408   DST(3, 1) = AVG3(B, C, D);
409 }
410 
vpx_d135_predictor_4x4_c(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)411 void vpx_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
412                               const uint8_t *above, const uint8_t *left) {
413   const int I = left[0];
414   const int J = left[1];
415   const int K = left[2];
416   const int L = left[3];
417   const int X = above[-1];
418   const int A = above[0];
419   const int B = above[1];
420   const int C = above[2];
421   const int D = above[3];
422   (void)stride;
423   DST(0, 3) = AVG3(J, K, L);
424   DST(1, 3) = DST(0, 2) = AVG3(I, J, K);
425   DST(2, 3) = DST(1, 2) = DST(0, 1) = AVG3(X, I, J);
426   DST(3, 3) = DST(2, 2) = DST(1, 1) = DST(0, 0) = AVG3(A, X, I);
427   DST(3, 2) = DST(2, 1) = DST(1, 0) = AVG3(B, A, X);
428   DST(3, 1) = DST(2, 0) = AVG3(C, B, A);
429   DST(3, 0) = AVG3(D, C, B);
430 }
431 
vpx_d153_predictor_4x4_c(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)432 void vpx_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
433                               const uint8_t *above, const uint8_t *left) {
434   const int I = left[0];
435   const int J = left[1];
436   const int K = left[2];
437   const int L = left[3];
438   const int X = above[-1];
439   const int A = above[0];
440   const int B = above[1];
441   const int C = above[2];
442 
443   DST(0, 0) = DST(2, 1) = AVG2(I, X);
444   DST(0, 1) = DST(2, 2) = AVG2(J, I);
445   DST(0, 2) = DST(2, 3) = AVG2(K, J);
446   DST(0, 3) = AVG2(L, K);
447 
448   DST(3, 0) = AVG3(A, B, C);
449   DST(2, 0) = AVG3(X, A, B);
450   DST(1, 0) = DST(3, 1) = AVG3(I, X, A);
451   DST(1, 1) = DST(3, 2) = AVG3(J, I, X);
452   DST(1, 2) = DST(3, 3) = AVG3(K, J, I);
453   DST(1, 3) = AVG3(L, K, J);
454 }
455 
456 #if CONFIG_VP9_HIGHBITDEPTH
highbd_d207_predictor(uint16_t * dst,ptrdiff_t stride,int bs,const uint16_t * above,const uint16_t * left,int bd)457 static INLINE void highbd_d207_predictor(uint16_t *dst, ptrdiff_t stride,
458                                          int bs, const uint16_t *above,
459                                          const uint16_t *left, int bd) {
460   int r, c;
461   (void)above;
462   (void)bd;
463 
464   // First column.
465   for (r = 0; r < bs - 1; ++r) {
466     dst[r * stride] = AVG2(left[r], left[r + 1]);
467   }
468   dst[(bs - 1) * stride] = left[bs - 1];
469   dst++;
470 
471   // Second column.
472   for (r = 0; r < bs - 2; ++r) {
473     dst[r * stride] = AVG3(left[r], left[r + 1], left[r + 2]);
474   }
475   dst[(bs - 2) * stride] = AVG3(left[bs - 2], left[bs - 1], left[bs - 1]);
476   dst[(bs - 1) * stride] = left[bs - 1];
477   dst++;
478 
479   // Rest of last row.
480   for (c = 0; c < bs - 2; ++c) dst[(bs - 1) * stride + c] = left[bs - 1];
481 
482   for (r = bs - 2; r >= 0; --r) {
483     for (c = 0; c < bs - 2; ++c)
484       dst[r * stride + c] = dst[(r + 1) * stride + c - 2];
485   }
486 }
487 
highbd_d63_predictor(uint16_t * dst,ptrdiff_t stride,int bs,const uint16_t * above,const uint16_t * left,int bd)488 static INLINE void highbd_d63_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
489                                         const uint16_t *above,
490                                         const uint16_t *left, int bd) {
491   int r, c;
492   int size;
493   (void)left;
494   (void)bd;
495   for (c = 0; c < bs; ++c) {
496     dst[c] = AVG2(above[c], above[c + 1]);
497     dst[stride + c] = AVG3(above[c], above[c + 1], above[c + 2]);
498   }
499   for (r = 2, size = bs - 2; r < bs; r += 2, --size) {
500     memcpy(dst + (r + 0) * stride, dst + (r >> 1), size * sizeof(*dst));
501     vpx_memset16(dst + (r + 0) * stride + size, above[bs - 1], bs - size);
502     memcpy(dst + (r + 1) * stride, dst + stride + (r >> 1),
503            size * sizeof(*dst));
504     vpx_memset16(dst + (r + 1) * stride + size, above[bs - 1], bs - size);
505   }
506 }
507 
highbd_d45_predictor(uint16_t * dst,ptrdiff_t stride,int bs,const uint16_t * above,const uint16_t * left,int bd)508 static INLINE void highbd_d45_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
509                                         const uint16_t *above,
510                                         const uint16_t *left, int bd) {
511   const uint16_t above_right = above[bs - 1];
512   const uint16_t *const dst_row0 = dst;
513   int x, size;
514   (void)left;
515   (void)bd;
516 
517   for (x = 0; x < bs - 1; ++x) {
518     dst[x] = AVG3(above[x], above[x + 1], above[x + 2]);
519   }
520   dst[bs - 1] = above_right;
521   dst += stride;
522   for (x = 1, size = bs - 2; x < bs; ++x, --size) {
523     memcpy(dst, dst_row0 + x, size * sizeof(*dst));
524     vpx_memset16(dst + size, above_right, x + 1);
525     dst += stride;
526   }
527 }
528 
highbd_d117_predictor(uint16_t * dst,ptrdiff_t stride,int bs,const uint16_t * above,const uint16_t * left,int bd)529 static INLINE void highbd_d117_predictor(uint16_t *dst, ptrdiff_t stride,
530                                          int bs, const uint16_t *above,
531                                          const uint16_t *left, int bd) {
532   int r, c;
533   (void)bd;
534 
535   // first row
536   for (c = 0; c < bs; c++) dst[c] = AVG2(above[c - 1], above[c]);
537   dst += stride;
538 
539   // second row
540   dst[0] = AVG3(left[0], above[-1], above[0]);
541   for (c = 1; c < bs; c++) dst[c] = AVG3(above[c - 2], above[c - 1], above[c]);
542   dst += stride;
543 
544   // the rest of first col
545   dst[0] = AVG3(above[-1], left[0], left[1]);
546   for (r = 3; r < bs; ++r)
547     dst[(r - 2) * stride] = AVG3(left[r - 3], left[r - 2], left[r - 1]);
548 
549   // the rest of the block
550   for (r = 2; r < bs; ++r) {
551     for (c = 1; c < bs; c++) dst[c] = dst[-2 * stride + c - 1];
552     dst += stride;
553   }
554 }
555 
highbd_d135_predictor(uint16_t * dst,ptrdiff_t stride,int bs,const uint16_t * above,const uint16_t * left,int bd)556 static INLINE void highbd_d135_predictor(uint16_t *dst, ptrdiff_t stride,
557                                          int bs, const uint16_t *above,
558                                          const uint16_t *left, int bd) {
559   int i;
560 #if defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ > 7
561   // silence a spurious -Warray-bounds warning, possibly related to:
562   // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56273
563   uint16_t border[69];
564 #else
565   uint16_t border[32 + 32 - 1];  // outer border from bottom-left to top-right
566 #endif
567   (void)bd;
568 
569   // dst(bs, bs - 2)[0], i.e., border starting at bottom-left
570   for (i = 0; i < bs - 2; ++i) {
571     border[i] = AVG3(left[bs - 3 - i], left[bs - 2 - i], left[bs - 1 - i]);
572   }
573   border[bs - 2] = AVG3(above[-1], left[0], left[1]);
574   border[bs - 1] = AVG3(left[0], above[-1], above[0]);
575   border[bs - 0] = AVG3(above[-1], above[0], above[1]);
576   // dst[0][2, size), i.e., remaining top border ascending
577   for (i = 0; i < bs - 2; ++i) {
578     border[bs + 1 + i] = AVG3(above[i], above[i + 1], above[i + 2]);
579   }
580 
581   for (i = 0; i < bs; ++i) {
582     memcpy(dst + i * stride, border + bs - 1 - i, bs * sizeof(dst[0]));
583   }
584 }
585 
highbd_d153_predictor(uint16_t * dst,ptrdiff_t stride,int bs,const uint16_t * above,const uint16_t * left,int bd)586 static INLINE void highbd_d153_predictor(uint16_t *dst, ptrdiff_t stride,
587                                          int bs, const uint16_t *above,
588                                          const uint16_t *left, int bd) {
589   int r, c;
590   (void)bd;
591   dst[0] = AVG2(above[-1], left[0]);
592   for (r = 1; r < bs; r++) dst[r * stride] = AVG2(left[r - 1], left[r]);
593   dst++;
594 
595   dst[0] = AVG3(left[0], above[-1], above[0]);
596   dst[stride] = AVG3(above[-1], left[0], left[1]);
597   for (r = 2; r < bs; r++)
598     dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]);
599   dst++;
600 
601   for (c = 0; c < bs - 2; c++)
602     dst[c] = AVG3(above[c - 1], above[c], above[c + 1]);
603   dst += stride;
604 
605   for (r = 1; r < bs; ++r) {
606     for (c = 0; c < bs - 2; c++) dst[c] = dst[-stride + c - 2];
607     dst += stride;
608   }
609 }
610 
highbd_v_predictor(uint16_t * dst,ptrdiff_t stride,int bs,const uint16_t * above,const uint16_t * left,int bd)611 static INLINE void highbd_v_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
612                                       const uint16_t *above,
613                                       const uint16_t *left, int bd) {
614   int r;
615   (void)left;
616   (void)bd;
617   for (r = 0; r < bs; r++) {
618     memcpy(dst, above, bs * sizeof(uint16_t));
619     dst += stride;
620   }
621 }
622 
highbd_h_predictor(uint16_t * dst,ptrdiff_t stride,int bs,const uint16_t * above,const uint16_t * left,int bd)623 static INLINE void highbd_h_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
624                                       const uint16_t *above,
625                                       const uint16_t *left, int bd) {
626   int r;
627   (void)above;
628   (void)bd;
629   for (r = 0; r < bs; r++) {
630     vpx_memset16(dst, left[r], bs);
631     dst += stride;
632   }
633 }
634 
highbd_tm_predictor(uint16_t * dst,ptrdiff_t stride,int bs,const uint16_t * above,const uint16_t * left,int bd)635 static INLINE void highbd_tm_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
636                                        const uint16_t *above,
637                                        const uint16_t *left, int bd) {
638   int r, c;
639   int ytop_left = above[-1];
640   (void)bd;
641 
642   for (r = 0; r < bs; r++) {
643     for (c = 0; c < bs; c++)
644       dst[c] = clip_pixel_highbd(left[r] + above[c] - ytop_left, bd);
645     dst += stride;
646   }
647 }
648 
highbd_dc_128_predictor(uint16_t * dst,ptrdiff_t stride,int bs,const uint16_t * above,const uint16_t * left,int bd)649 static INLINE void highbd_dc_128_predictor(uint16_t *dst, ptrdiff_t stride,
650                                            int bs, const uint16_t *above,
651                                            const uint16_t *left, int bd) {
652   int r;
653   (void)above;
654   (void)left;
655 
656   for (r = 0; r < bs; r++) {
657     vpx_memset16(dst, 128 << (bd - 8), bs);
658     dst += stride;
659   }
660 }
661 
highbd_dc_left_predictor(uint16_t * dst,ptrdiff_t stride,int bs,const uint16_t * above,const uint16_t * left,int bd)662 static INLINE void highbd_dc_left_predictor(uint16_t *dst, ptrdiff_t stride,
663                                             int bs, const uint16_t *above,
664                                             const uint16_t *left, int bd) {
665   int i, r, expected_dc, sum = 0;
666   (void)above;
667   (void)bd;
668 
669   for (i = 0; i < bs; i++) sum += left[i];
670   expected_dc = (sum + (bs >> 1)) / bs;
671 
672   for (r = 0; r < bs; r++) {
673     vpx_memset16(dst, expected_dc, bs);
674     dst += stride;
675   }
676 }
677 
highbd_dc_top_predictor(uint16_t * dst,ptrdiff_t stride,int bs,const uint16_t * above,const uint16_t * left,int bd)678 static INLINE void highbd_dc_top_predictor(uint16_t *dst, ptrdiff_t stride,
679                                            int bs, const uint16_t *above,
680                                            const uint16_t *left, int bd) {
681   int i, r, expected_dc, sum = 0;
682   (void)left;
683   (void)bd;
684 
685   for (i = 0; i < bs; i++) sum += above[i];
686   expected_dc = (sum + (bs >> 1)) / bs;
687 
688   for (r = 0; r < bs; r++) {
689     vpx_memset16(dst, expected_dc, bs);
690     dst += stride;
691   }
692 }
693 
highbd_dc_predictor(uint16_t * dst,ptrdiff_t stride,int bs,const uint16_t * above,const uint16_t * left,int bd)694 static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
695                                        const uint16_t *above,
696                                        const uint16_t *left, int bd) {
697   int i, r, expected_dc, sum = 0;
698   const int count = 2 * bs;
699   (void)bd;
700 
701   for (i = 0; i < bs; i++) {
702     sum += above[i];
703     sum += left[i];
704   }
705 
706   expected_dc = (sum + (count >> 1)) / count;
707 
708   for (r = 0; r < bs; r++) {
709     vpx_memset16(dst, expected_dc, bs);
710     dst += stride;
711   }
712 }
713 
vpx_highbd_d207_predictor_4x4_c(uint16_t * dst,ptrdiff_t stride,const uint16_t * above,const uint16_t * left,int bd)714 void vpx_highbd_d207_predictor_4x4_c(uint16_t *dst, ptrdiff_t stride,
715                                      const uint16_t *above,
716                                      const uint16_t *left, int bd) {
717   const int I = left[0];
718   const int J = left[1];
719   const int K = left[2];
720   const int L = left[3];
721   (void)above;
722   (void)bd;
723   DST(0, 0) = AVG2(I, J);
724   DST(2, 0) = DST(0, 1) = AVG2(J, K);
725   DST(2, 1) = DST(0, 2) = AVG2(K, L);
726   DST(1, 0) = AVG3(I, J, K);
727   DST(3, 0) = DST(1, 1) = AVG3(J, K, L);
728   DST(3, 1) = DST(1, 2) = AVG3(K, L, L);
729   DST(3, 2) = DST(2, 2) = DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
730 }
731 
vpx_highbd_d63_predictor_4x4_c(uint16_t * dst,ptrdiff_t stride,const uint16_t * above,const uint16_t * left,int bd)732 void vpx_highbd_d63_predictor_4x4_c(uint16_t *dst, ptrdiff_t stride,
733                                     const uint16_t *above, const uint16_t *left,
734                                     int bd) {
735   const int A = above[0];
736   const int B = above[1];
737   const int C = above[2];
738   const int D = above[3];
739   const int E = above[4];
740   const int F = above[5];
741   const int G = above[6];
742   (void)left;
743   (void)bd;
744   DST(0, 0) = AVG2(A, B);
745   DST(1, 0) = DST(0, 2) = AVG2(B, C);
746   DST(2, 0) = DST(1, 2) = AVG2(C, D);
747   DST(3, 0) = DST(2, 2) = AVG2(D, E);
748   DST(3, 2) = AVG2(E, F);  // differs from vp8
749 
750   DST(0, 1) = AVG3(A, B, C);
751   DST(1, 1) = DST(0, 3) = AVG3(B, C, D);
752   DST(2, 1) = DST(1, 3) = AVG3(C, D, E);
753   DST(3, 1) = DST(2, 3) = AVG3(D, E, F);
754   DST(3, 3) = AVG3(E, F, G);  // differs from vp8
755 }
756 
vpx_highbd_d45_predictor_4x4_c(uint16_t * dst,ptrdiff_t stride,const uint16_t * above,const uint16_t * left,int bd)757 void vpx_highbd_d45_predictor_4x4_c(uint16_t *dst, ptrdiff_t stride,
758                                     const uint16_t *above, const uint16_t *left,
759                                     int bd) {
760   const int A = above[0];
761   const int B = above[1];
762   const int C = above[2];
763   const int D = above[3];
764   const int E = above[4];
765   const int F = above[5];
766   const int G = above[6];
767   const int H = above[7];
768   (void)left;
769   (void)bd;
770   DST(0, 0) = AVG3(A, B, C);
771   DST(1, 0) = DST(0, 1) = AVG3(B, C, D);
772   DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E);
773   DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F);
774   DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G);
775   DST(3, 2) = DST(2, 3) = AVG3(F, G, H);
776   DST(3, 3) = H;  // differs from vp8
777 }
778 
vpx_highbd_d117_predictor_4x4_c(uint16_t * dst,ptrdiff_t stride,const uint16_t * above,const uint16_t * left,int bd)779 void vpx_highbd_d117_predictor_4x4_c(uint16_t *dst, ptrdiff_t stride,
780                                      const uint16_t *above,
781                                      const uint16_t *left, int bd) {
782   const int I = left[0];
783   const int J = left[1];
784   const int K = left[2];
785   const int X = above[-1];
786   const int A = above[0];
787   const int B = above[1];
788   const int C = above[2];
789   const int D = above[3];
790   (void)bd;
791   DST(0, 0) = DST(1, 2) = AVG2(X, A);
792   DST(1, 0) = DST(2, 2) = AVG2(A, B);
793   DST(2, 0) = DST(3, 2) = AVG2(B, C);
794   DST(3, 0) = AVG2(C, D);
795 
796   DST(0, 3) = AVG3(K, J, I);
797   DST(0, 2) = AVG3(J, I, X);
798   DST(0, 1) = DST(1, 3) = AVG3(I, X, A);
799   DST(1, 1) = DST(2, 3) = AVG3(X, A, B);
800   DST(2, 1) = DST(3, 3) = AVG3(A, B, C);
801   DST(3, 1) = AVG3(B, C, D);
802 }
803 
vpx_highbd_d135_predictor_4x4_c(uint16_t * dst,ptrdiff_t stride,const uint16_t * above,const uint16_t * left,int bd)804 void vpx_highbd_d135_predictor_4x4_c(uint16_t *dst, ptrdiff_t stride,
805                                      const uint16_t *above,
806                                      const uint16_t *left, int bd) {
807   const int I = left[0];
808   const int J = left[1];
809   const int K = left[2];
810   const int L = left[3];
811   const int X = above[-1];
812   const int A = above[0];
813   const int B = above[1];
814   const int C = above[2];
815   const int D = above[3];
816   (void)bd;
817   DST(0, 3) = AVG3(J, K, L);
818   DST(1, 3) = DST(0, 2) = AVG3(I, J, K);
819   DST(2, 3) = DST(1, 2) = DST(0, 1) = AVG3(X, I, J);
820   DST(3, 3) = DST(2, 2) = DST(1, 1) = DST(0, 0) = AVG3(A, X, I);
821   DST(3, 2) = DST(2, 1) = DST(1, 0) = AVG3(B, A, X);
822   DST(3, 1) = DST(2, 0) = AVG3(C, B, A);
823   DST(3, 0) = AVG3(D, C, B);
824 }
825 
vpx_highbd_d153_predictor_4x4_c(uint16_t * dst,ptrdiff_t stride,const uint16_t * above,const uint16_t * left,int bd)826 void vpx_highbd_d153_predictor_4x4_c(uint16_t *dst, ptrdiff_t stride,
827                                      const uint16_t *above,
828                                      const uint16_t *left, int bd) {
829   const int I = left[0];
830   const int J = left[1];
831   const int K = left[2];
832   const int L = left[3];
833   const int X = above[-1];
834   const int A = above[0];
835   const int B = above[1];
836   const int C = above[2];
837   (void)bd;
838 
839   DST(0, 0) = DST(2, 1) = AVG2(I, X);
840   DST(0, 1) = DST(2, 2) = AVG2(J, I);
841   DST(0, 2) = DST(2, 3) = AVG2(K, J);
842   DST(0, 3) = AVG2(L, K);
843 
844   DST(3, 0) = AVG3(A, B, C);
845   DST(2, 0) = AVG3(X, A, B);
846   DST(1, 0) = DST(3, 1) = AVG3(I, X, A);
847   DST(1, 1) = DST(3, 2) = AVG3(J, I, X);
848   DST(1, 2) = DST(3, 3) = AVG3(K, J, I);
849   DST(1, 3) = AVG3(L, K, J);
850 }
851 #endif  // CONFIG_VP9_HIGHBITDEPTH
852 
853 // This serves as a wrapper function, so that all the prediction functions
854 // can be unified and accessed as a pointer array. Note that the boundary
855 // above and left are not necessarily used all the time.
856 #define intra_pred_sized(type, size)                        \
857   void vpx_##type##_predictor_##size##x##size##_c(          \
858       uint8_t *dst, ptrdiff_t stride, const uint8_t *above, \
859       const uint8_t *left) {                                \
860     type##_predictor(dst, stride, size, above, left);       \
861   }
862 
863 #if CONFIG_VP9_HIGHBITDEPTH
864 #define intra_pred_highbd_sized(type, size)                        \
865   void vpx_highbd_##type##_predictor_##size##x##size##_c(          \
866       uint16_t *dst, ptrdiff_t stride, const uint16_t *above,      \
867       const uint16_t *left, int bd) {                              \
868     highbd_##type##_predictor(dst, stride, size, above, left, bd); \
869   }
870 
871 /* clang-format off */
872 #define intra_pred_allsizes(type) \
873   intra_pred_sized(type, 4) \
874   intra_pred_sized(type, 8) \
875   intra_pred_sized(type, 16) \
876   intra_pred_sized(type, 32) \
877   intra_pred_highbd_sized(type, 4) \
878   intra_pred_highbd_sized(type, 8) \
879   intra_pred_highbd_sized(type, 16) \
880   intra_pred_highbd_sized(type, 32)
881 
882 #define intra_pred_no_4x4(type) \
883   intra_pred_sized(type, 8) \
884   intra_pred_sized(type, 16) \
885   intra_pred_sized(type, 32) \
886   intra_pred_highbd_sized(type, 8) \
887   intra_pred_highbd_sized(type, 16) \
888   intra_pred_highbd_sized(type, 32)
889 
890 #else
891 #define intra_pred_allsizes(type) \
892   intra_pred_sized(type, 4) \
893   intra_pred_sized(type, 8) \
894   intra_pred_sized(type, 16) \
895   intra_pred_sized(type, 32)
896 
897 #define intra_pred_no_4x4(type) \
898   intra_pred_sized(type, 8) \
899   intra_pred_sized(type, 16) \
900   intra_pred_sized(type, 32)
901 #endif  // CONFIG_VP9_HIGHBITDEPTH
902 
903 intra_pred_no_4x4(d207)
904 intra_pred_no_4x4(d63)
905 intra_pred_no_4x4(d45)
906 intra_pred_no_4x4(d117)
907 intra_pred_no_4x4(d135)
908 intra_pred_no_4x4(d153)
909 intra_pred_allsizes(v)
910 intra_pred_allsizes(h)
911 intra_pred_allsizes(tm)
912 intra_pred_allsizes(dc_128)
913 intra_pred_allsizes(dc_left)
914 intra_pred_allsizes(dc_top)
915 intra_pred_allsizes(dc)
916 /* clang-format on */
917 #undef intra_pred_allsizes
918