1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 /*!\file
13  * \brief Describes film grain parameters and film grain synthesis
14  *
15  */
16 
17 #include <stdio.h>
18 #include <string.h>
19 #include <stdlib.h>
20 #include <assert.h>
21 #include "aom_dsp/grain_synthesis.h"
22 #include "aom_mem/aom_mem.h"
23 
24 // Samples with Gaussian distribution in the range of [-2048, 2047] (12 bits)
25 // with zero mean and standard deviation of about 512.
26 // should be divided by 4 for 10-bit range and 16 for 8-bit range.
27 static const int gaussian_sequence[2048] = {
28   56,    568,   -180,  172,   124,   -84,   172,   -64,   -900,  24,   820,
29   224,   1248,  996,   272,   -8,    -916,  -388,  -732,  -104,  -188, 800,
30   112,   -652,  -320,  -376,  140,   -252,  492,   -168,  44,    -788, 588,
31   -584,  500,   -228,  12,    680,   272,   -476,  972,   -100,  652,  368,
32   432,   -196,  -720,  -192,  1000,  -332,  652,   -136,  -552,  -604, -4,
33   192,   -220,  -136,  1000,  -52,   372,   -96,   -624,  124,   -24,  396,
34   540,   -12,   -104,  640,   464,   244,   -208,  -84,   368,   -528, -740,
35   248,   -968,  -848,  608,   376,   -60,   -292,  -40,   -156,  252,  -292,
36   248,   224,   -280,  400,   -244,  244,   -60,   76,    -80,   212,  532,
37   340,   128,   -36,   824,   -352,  -60,   -264,  -96,   -612,  416,  -704,
38   220,   -204,  640,   -160,  1220,  -408,  900,   336,   20,    -336, -96,
39   -792,  304,   48,    -28,   -1232, -1172, -448,  104,   -292,  -520, 244,
40   60,    -948,  0,     -708,  268,   108,   356,   -548,  488,   -344, -136,
41   488,   -196,  -224,  656,   -236,  -1128, 60,    4,     140,   276,  -676,
42   -376,  168,   -108,  464,   8,     564,   64,    240,   308,   -300, -400,
43   -456,  -136,  56,    120,   -408,  -116,  436,   504,   -232,  328,  844,
44   -164,  -84,   784,   -168,  232,   -224,  348,   -376,  128,   568,  96,
45   -1244, -288,  276,   848,   832,   -360,  656,   464,   -384,  -332, -356,
46   728,   -388,  160,   -192,  468,   296,   224,   140,   -776,  -100, 280,
47   4,     196,   44,    -36,   -648,  932,   16,    1428,  28,    528,  808,
48   772,   20,    268,   88,    -332,  -284,  124,   -384,  -448,  208,  -228,
49   -1044, -328,  660,   380,   -148,  -300,  588,   240,   540,   28,   136,
50   -88,   -436,  256,   296,   -1000, 1400,  0,     -48,   1056,  -136, 264,
51   -528,  -1108, 632,   -484,  -592,  -344,  796,   124,   -668,  -768, 388,
52   1296,  -232,  -188,  -200,  -288,  -4,    308,   100,   -168,  256,  -500,
53   204,   -508,  648,   -136,  372,   -272,  -120,  -1004, -552,  -548, -384,
54   548,   -296,  428,   -108,  -8,    -912,  -324,  -224,  -88,   -112, -220,
55   -100,  996,   -796,  548,   360,   -216,  180,   428,   -200,  -212, 148,
56   96,    148,   284,   216,   -412,  -320,  120,   -300,  -384,  -604, -572,
57   -332,  -8,    -180,  -176,  696,   116,   -88,   628,   76,    44,   -516,
58   240,   -208,  -40,   100,   -592,  344,   -308,  -452,  -228,  20,   916,
59   -1752, -136,  -340,  -804,  140,   40,    512,   340,   248,   184,  -492,
60   896,   -156,  932,   -628,  328,   -688,  -448,  -616,  -752,  -100, 560,
61   -1020, 180,   -800,  -64,   76,    576,   1068,  396,   660,   552,  -108,
62   -28,   320,   -628,  312,   -92,   -92,   -472,  268,   16,    560,  516,
63   -672,  -52,   492,   -100,  260,   384,   284,   292,   304,   -148, 88,
64   -152,  1012,  1064,  -228,  164,   -376,  -684,  592,   -392,  156,  196,
65   -524,  -64,   -884,  160,   -176,  636,   648,   404,   -396,  -436, 864,
66   424,   -728,  988,   -604,  904,   -592,  296,   -224,  536,   -176, -920,
67   436,   -48,   1176,  -884,  416,   -776,  -824,  -884,  524,   -548, -564,
68   -68,   -164,  -96,   692,   364,   -692,  -1012, -68,   260,   -480, 876,
69   -1116, 452,   -332,  -352,  892,   -1088, 1220,  -676,  12,    -292, 244,
70   496,   372,   -32,   280,   200,   112,   -440,  -96,   24,    -644, -184,
71   56,    -432,  224,   -980,  272,   -260,  144,   -436,  420,   356,  364,
72   -528,  76,    172,   -744,  -368,  404,   -752,  -416,  684,   -688, 72,
73   540,   416,   92,    444,   480,   -72,   -1416, 164,   -1172, -68,  24,
74   424,   264,   1040,  128,   -912,  -524,  -356,  64,    876,   -12,  4,
75   -88,   532,   272,   -524,  320,   276,   -508,  940,   24,    -400, -120,
76   756,   60,    236,   -412,  100,   376,   -484,  400,   -100,  -740, -108,
77   -260,  328,   -268,  224,   -200,  -416,  184,   -604,  -564,  -20,  296,
78   60,    892,   -888,  60,    164,   68,    -760,  216,   -296,  904,  -336,
79   -28,   404,   -356,  -568,  -208,  -1480, -512,  296,   328,   -360, -164,
80   -1560, -776,  1156,  -428,  164,   -504,  -112,  120,   -216,  -148, -264,
81   308,   32,    64,    -72,   72,    116,   176,   -64,   -272,  460,  -536,
82   -784,  -280,  348,   108,   -752,  -132,  524,   -540,  -776,  116,  -296,
83   -1196, -288,  -560,  1040,  -472,  116,   -848,  -1116, 116,   636,  696,
84   284,   -176,  1016,  204,   -864,  -648,  -248,  356,   972,   -584, -204,
85   264,   880,   528,   -24,   -184,  116,   448,   -144,  828,   524,  212,
86   -212,  52,    12,    200,   268,   -488,  -404,  -880,  824,   -672, -40,
87   908,   -248,  500,   716,   -576,  492,   -576,  16,    720,   -108, 384,
88   124,   344,   280,   576,   -500,  252,   104,   -308,  196,   -188, -8,
89   1268,  296,   1032,  -1196, 436,   316,   372,   -432,  -200,  -660, 704,
90   -224,  596,   -132,  268,   32,    -452,  884,   104,   -1008, 424,  -1348,
91   -280,  4,     -1168, 368,   476,   696,   300,   -8,    24,    180,  -592,
92   -196,  388,   304,   500,   724,   -160,  244,   -84,   272,   -256, -420,
93   320,   208,   -144,  -156,  156,   364,   452,   28,    540,   316,  220,
94   -644,  -248,  464,   72,    360,   32,    -388,  496,   -680,  -48,  208,
95   -116,  -408,  60,    -604,  -392,  548,   -840,  784,   -460,  656,  -544,
96   -388,  -264,  908,   -800,  -628,  -612,  -568,  572,   -220,  164,  288,
97   -16,   -308,  308,   -112,  -636,  -760,  280,   -668,  432,   364,  240,
98   -196,  604,   340,   384,   196,   592,   -44,   -500,  432,   -580, -132,
99   636,   -76,   392,   4,     -412,  540,   508,   328,   -356,  -36,  16,
100   -220,  -64,   -248,  -60,   24,    -192,  368,   1040,  92,    -24,  -1044,
101   -32,   40,    104,   148,   192,   -136,  -520,  56,    -816,  -224, 732,
102   392,   356,   212,   -80,   -424,  -1008, -324,  588,   -1496, 576,  460,
103   -816,  -848,  56,    -580,  -92,   -1372, -112,  -496,  200,   364,  52,
104   -140,  48,    -48,   -60,   84,    72,    40,    132,   -356,  -268, -104,
105   -284,  -404,  732,   -520,  164,   -304,  -540,  120,   328,   -76,  -460,
106   756,   388,   588,   236,   -436,  -72,   -176,  -404,  -316,  -148, 716,
107   -604,  404,   -72,   -88,   -888,  -68,   944,   88,    -220,  -344, 960,
108   472,   460,   -232,  704,   120,   832,   -228,  692,   -508,  132,  -476,
109   844,   -748,  -364,  -44,   1116,  -1104, -1056, 76,    428,   552,  -692,
110   60,    356,   96,    -384,  -188,  -612,  -576,  736,   508,   892,  352,
111   -1132, 504,   -24,   -352,  324,   332,   -600,  -312,  292,   508,  -144,
112   -8,    484,   48,    284,   -260,  -240,  256,   -100,  -292,  -204, -44,
113   472,   -204,  908,   -188,  -1000, -256,  92,    1164,  -392,  564,  356,
114   652,   -28,   -884,  256,   484,   -192,  760,   -176,  376,   -524, -452,
115   -436,  860,   -736,  212,   124,   504,   -476,  468,   76,    -472, 552,
116   -692,  -944,  -620,  740,   -240,  400,   132,   20,    192,   -196, 264,
117   -668,  -1012, -60,   296,   -316,  -828,  76,    -156,  284,   -768, -448,
118   -832,  148,   248,   652,   616,   1236,  288,   -328,  -400,  -124, 588,
119   220,   520,   -696,  1032,  768,   -740,  -92,   -272,  296,   448,  -464,
120   412,   -200,  392,   440,   -200,  264,   -152,  -260,  320,   1032, 216,
121   320,   -8,    -64,   156,   -1016, 1084,  1172,  536,   484,   -432, 132,
122   372,   -52,   -256,  84,    116,   -352,  48,    116,   304,   -384, 412,
123   924,   -300,  528,   628,   180,   648,   44,    -980,  -220,  1320, 48,
124   332,   748,   524,   -268,  -720,  540,   -276,  564,   -344,  -208, -196,
125   436,   896,   88,    -392,  132,   80,    -964,  -288,  568,   56,   -48,
126   -456,  888,   8,     552,   -156,  -292,  948,   288,   128,   -716, -292,
127   1192,  -152,  876,   352,   -600,  -260,  -812,  -468,  -28,   -120, -32,
128   -44,   1284,  496,   192,   464,   312,   -76,   -516,  -380,  -456, -1012,
129   -48,   308,   -156,  36,    492,   -156,  -808,  188,   1652,  68,   -120,
130   -116,  316,   160,   -140,  352,   808,   -416,  592,   316,   -480, 56,
131   528,   -204,  -568,  372,   -232,  752,   -344,  744,   -4,    324,  -416,
132   -600,  768,   268,   -248,  -88,   -132,  -420,  -432,  80,    -288, 404,
133   -316,  -1216, -588,  520,   -108,  92,    -320,  368,   -480,  -216, -92,
134   1688,  -300,  180,   1020,  -176,  820,   -68,   -228,  -260,  436,  -904,
135   20,    40,    -508,  440,   -736,  312,   332,   204,   760,   -372, 728,
136   96,    -20,   -632,  -520,  -560,  336,   1076,  -64,   -532,  776,  584,
137   192,   396,   -728,  -520,  276,   -188,  80,    -52,   -612,  -252, -48,
138   648,   212,   -688,  228,   -52,   -260,  428,   -412,  -272,  -404, 180,
139   816,   -796,  48,    152,   484,   -88,   -216,  988,   696,   188,  -528,
140   648,   -116,  -180,  316,   476,   12,    -564,  96,    476,   -252, -364,
141   -376,  -392,  556,   -256,  -576,  260,   -352,  120,   -16,   -136, -260,
142   -492,  72,    556,   660,   580,   616,   772,   436,   424,   -32,  -324,
143   -1268, 416,   -324,  -80,   920,   160,   228,   724,   32,    -516, 64,
144   384,   68,    -128,  136,   240,   248,   -204,  -68,   252,   -932, -120,
145   -480,  -628,  -84,   192,   852,   -404,  -288,  -132,  204,   100,  168,
146   -68,   -196,  -868,  460,   1080,  380,   -80,   244,   0,     484,  -888,
147   64,    184,   352,   600,   460,   164,   604,   -196,  320,   -64,  588,
148   -184,  228,   12,    372,   48,    -848,  -344,  224,   208,   -200, 484,
149   128,   -20,   272,   -468,  -840,  384,   256,   -720,  -520,  -464, -580,
150   112,   -120,  644,   -356,  -208,  -608,  -528,  704,   560,   -424, 392,
151   828,   40,    84,    200,   -152,  0,     -144,  584,   280,   -120, 80,
152   -556,  -972,  -196,  -472,  724,   80,    168,   -32,   88,    160,  -688,
153   0,     160,   356,   372,   -776,  740,   -128,  676,   -248,  -480, 4,
154   -364,  96,    544,   232,   -1032, 956,   236,   356,   20,    -40,  300,
155   24,    -676,  -596,  132,   1120,  -104,  532,   -1096, 568,   648,  444,
156   508,   380,   188,   -376,  -604,  1488,  424,   24,    756,   -220, -192,
157   716,   120,   920,   688,   168,   44,    -460,  568,   284,   1144, 1160,
158   600,   424,   888,   656,   -356,  -320,  220,   316,   -176,  -724, -188,
159   -816,  -628,  -348,  -228,  -380,  1012,  -452,  -660,  736,   928,  404,
160   -696,  -72,   -268,  -892,  128,   184,   -344,  -780,  360,   336,  400,
161   344,   428,   548,   -112,  136,   -228,  -216,  -820,  -516,  340,  92,
162   -136,  116,   -300,  376,   -244,  100,   -316,  -520,  -284,  -12,  824,
163   164,   -548,  -180,  -128,  116,   -924,  -828,  268,   -368,  -580, 620,
164   192,   160,   0,     -1676, 1068,  424,   -56,   -360,  468,   -156, 720,
165   288,   -528,  556,   -364,  548,   -148,  504,   316,   152,   -648, -620,
166   -684,  -24,   -376,  -384,  -108,  -920,  -1032, 768,   180,   -264, -508,
167   -1268, -260,  -60,   300,   -240,  988,   724,   -376,  -576,  -212, -736,
168   556,   192,   1092,  -620,  -880,  376,   -56,   -4,    -216,  -32,  836,
169   268,   396,   1332,  864,   -600,  100,   56,    -412,  -92,   356,  180,
170   884,   -468,  -436,  292,   -388,  -804,  -704,  -840,  368,   -348, 140,
171   -724,  1536,  940,   372,   112,   -372,  436,   -480,  1136,  296,  -32,
172   -228,  132,   -48,   -220,  868,   -1016, -60,   -1044, -464,  328,  916,
173   244,   12,    -736,  -296,  360,   468,   -376,  -108,  -92,   788,  368,
174   -56,   544,   400,   -672,  -420,  728,   16,    320,   44,    -284, -380,
175   -796,  488,   132,   204,   -596,  -372,  88,    -152,  -908,  -636, -572,
176   -624,  -116,  -692,  -200,  -56,   276,   -88,   484,   -324,  948,  864,
177   1000,  -456,  -184,  -276,  292,   -296,  156,   676,   320,   160,  908,
178   -84,   -1236, -288,  -116,  260,   -372,  -644,  732,   -756,  -96,  84,
179   344,   -520,  348,   -688,  240,   -84,   216,   -1044, -136,  -676, -396,
180   -1500, 960,   -40,   176,   168,   1516,  420,   -504,  -344,  -364, -360,
181   1216,  -940,  -380,  -212,  252,   -660,  -708,  484,   -444,  -152, 928,
182   -120,  1112,  476,   -260,  560,   -148,  -344,  108,   -196,  228,  -288,
183   504,   560,   -328,  -88,   288,   -1008, 460,   -228,  468,   -836, -196,
184   76,    388,   232,   412,   -1168, -716,  -644,  756,   -172,  -356, -504,
185   116,   432,   528,   48,    476,   -168,  -608,  448,   160,   -532, -272,
186   28,    -676,  -12,   828,   980,   456,   520,   104,   -104,  256,  -344,
187   -4,    -28,   -368,  -52,   -524,  -572,  -556,  -200,  768,   1124, -208,
188   -512,  176,   232,   248,   -148,  -888,  604,   -600,  -304,  804,  -156,
189   -212,  488,   -192,  -804,  -256,  368,   -360,  -916,  -328,  228,  -240,
190   -448,  -472,  856,   -556,  -364,  572,   -12,   -156,  -368,  -340, 432,
191   252,   -752,  -152,  288,   268,   -580,  -848,  -592,  108,   -76,  244,
192   312,   -716,  592,   -80,   436,   360,   4,     -248,  160,   516,  584,
193   732,   44,    -468,  -280,  -292,  -156,  -588,  28,    308,   912,  24,
194   124,   156,   180,   -252,  944,   -924,  -772,  -520,  -428,  -624, 300,
195   -212,  -1144, 32,    -724,  800,   -1128, -212,  -1288, -848,  180,  -416,
196   440,   192,   -576,  -792,  -76,   -1080, 80,    -532,  -352,  -132, 380,
197   -820,  148,   1112,  128,   164,   456,   700,   -924,  144,   -668, -384,
198   648,   -832,  508,   552,   -52,   -100,  -656,  208,   -568,  748,  -88,
199   680,   232,   300,   192,   -408,  -1012, -152,  -252,  -268,  272,  -876,
200   -664,  -648,  -332,  -136,  16,    12,    1152,  -28,   332,   -536, 320,
201   -672,  -460,  -316,  532,   -260,  228,   -40,   1052,  -816,  180,  88,
202   -496,  -556,  -672,  -368,  428,   92,    356,   404,   -408,  252,  196,
203   -176,  -556,  792,   268,   32,    372,   40,    96,    -332,  328,  120,
204   372,   -900,  -40,   472,   -264,  -592,  952,   128,   656,   112,  664,
205   -232,  420,   4,     -344,  -464,  556,   244,   -416,  -32,   252,  0,
206   -412,  188,   -696,  508,   -476,  324,   -1096, 656,   -312,  560,  264,
207   -136,  304,   160,   -64,   -580,  248,   336,   -720,  560,   -348, -288,
208   -276,  -196,  -500,  852,   -544,  -236,  -1128, -992,  -776,  116,  56,
209   52,    860,   884,   212,   -12,   168,   1020,  512,   -552,  924,  -148,
210   716,   188,   164,   -340,  -520,  -184,  880,   -152,  -680,  -208, -1156,
211   -300,  -528,  -472,  364,   100,   -744,  -1056, -32,   540,   280,  144,
212   -676,  -32,   -232,  -280,  -224,  96,    568,   -76,   172,   148,  148,
213   104,   32,    -296,  -32,   788,   -80,   32,    -16,   280,   288,  944,
214   428,   -484
215 };
216 
217 static const int gauss_bits = 11;
218 
219 static int luma_subblock_size_y = 32;
220 static int luma_subblock_size_x = 32;
221 
222 static int chroma_subblock_size_y = 16;
223 static int chroma_subblock_size_x = 16;
224 
225 static const int min_luma_legal_range = 16;
226 static const int max_luma_legal_range = 235;
227 
228 static const int min_chroma_legal_range = 16;
229 static const int max_chroma_legal_range = 240;
230 
231 static int scaling_lut_y[256];
232 static int scaling_lut_cb[256];
233 static int scaling_lut_cr[256];
234 
235 static int grain_min;
236 static int grain_max;
237 
238 static uint16_t random_register = 0;  // random number generator register
239 
init_arrays(const aom_film_grain_t * params,int luma_stride,int chroma_stride,int *** pred_pos_luma_p,int *** pred_pos_chroma_p,int ** luma_grain_block,int ** cb_grain_block,int ** cr_grain_block,int ** y_line_buf,int ** cb_line_buf,int ** cr_line_buf,int ** y_col_buf,int ** cb_col_buf,int ** cr_col_buf,int luma_grain_samples,int chroma_grain_samples,int chroma_subsamp_y,int chroma_subsamp_x)240 static void init_arrays(const aom_film_grain_t *params, int luma_stride,
241                         int chroma_stride, int ***pred_pos_luma_p,
242                         int ***pred_pos_chroma_p, int **luma_grain_block,
243                         int **cb_grain_block, int **cr_grain_block,
244                         int **y_line_buf, int **cb_line_buf, int **cr_line_buf,
245                         int **y_col_buf, int **cb_col_buf, int **cr_col_buf,
246                         int luma_grain_samples, int chroma_grain_samples,
247                         int chroma_subsamp_y, int chroma_subsamp_x) {
248   memset(scaling_lut_y, 0, sizeof(*scaling_lut_y) * 256);
249   memset(scaling_lut_cb, 0, sizeof(*scaling_lut_cb) * 256);
250   memset(scaling_lut_cr, 0, sizeof(*scaling_lut_cr) * 256);
251 
252   int num_pos_luma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
253   int num_pos_chroma = num_pos_luma;
254   if (params->num_y_points > 0) ++num_pos_chroma;
255 
256   int **pred_pos_luma;
257   int **pred_pos_chroma;
258 
259   pred_pos_luma = (int **)aom_malloc(sizeof(*pred_pos_luma) * num_pos_luma);
260 
261   for (int row = 0; row < num_pos_luma; row++) {
262     pred_pos_luma[row] = (int *)aom_malloc(sizeof(**pred_pos_luma) * 3);
263   }
264 
265   pred_pos_chroma =
266       (int **)aom_malloc(sizeof(*pred_pos_chroma) * num_pos_chroma);
267 
268   for (int row = 0; row < num_pos_chroma; row++) {
269     pred_pos_chroma[row] = (int *)aom_malloc(sizeof(**pred_pos_chroma) * 3);
270   }
271 
272   int pos_ar_index = 0;
273 
274   for (int row = -params->ar_coeff_lag; row < 0; row++) {
275     for (int col = -params->ar_coeff_lag; col < params->ar_coeff_lag + 1;
276          col++) {
277       pred_pos_luma[pos_ar_index][0] = row;
278       pred_pos_luma[pos_ar_index][1] = col;
279       pred_pos_luma[pos_ar_index][2] = 0;
280 
281       pred_pos_chroma[pos_ar_index][0] = row;
282       pred_pos_chroma[pos_ar_index][1] = col;
283       pred_pos_chroma[pos_ar_index][2] = 0;
284       ++pos_ar_index;
285     }
286   }
287 
288   for (int col = -params->ar_coeff_lag; col < 0; col++) {
289     pred_pos_luma[pos_ar_index][0] = 0;
290     pred_pos_luma[pos_ar_index][1] = col;
291     pred_pos_luma[pos_ar_index][2] = 0;
292 
293     pred_pos_chroma[pos_ar_index][0] = 0;
294     pred_pos_chroma[pos_ar_index][1] = col;
295     pred_pos_chroma[pos_ar_index][2] = 0;
296 
297     ++pos_ar_index;
298   }
299 
300   if (params->num_y_points > 0) {
301     pred_pos_chroma[pos_ar_index][0] = 0;
302     pred_pos_chroma[pos_ar_index][1] = 0;
303     pred_pos_chroma[pos_ar_index][2] = 1;
304   }
305 
306   *pred_pos_luma_p = pred_pos_luma;
307   *pred_pos_chroma_p = pred_pos_chroma;
308 
309   *y_line_buf = (int *)aom_malloc(sizeof(**y_line_buf) * luma_stride * 2);
310   *cb_line_buf = (int *)aom_malloc(sizeof(**cb_line_buf) * chroma_stride *
311                                    (2 >> chroma_subsamp_y));
312   *cr_line_buf = (int *)aom_malloc(sizeof(**cr_line_buf) * chroma_stride *
313                                    (2 >> chroma_subsamp_y));
314 
315   *y_col_buf =
316       (int *)aom_malloc(sizeof(**y_col_buf) * (luma_subblock_size_y + 2) * 2);
317   *cb_col_buf =
318       (int *)aom_malloc(sizeof(**cb_col_buf) *
319                         (chroma_subblock_size_y + (2 >> chroma_subsamp_y)) *
320                         (2 >> chroma_subsamp_x));
321   *cr_col_buf =
322       (int *)aom_malloc(sizeof(**cr_col_buf) *
323                         (chroma_subblock_size_y + (2 >> chroma_subsamp_y)) *
324                         (2 >> chroma_subsamp_x));
325 
326   *luma_grain_block =
327       (int *)aom_malloc(sizeof(**luma_grain_block) * luma_grain_samples);
328   *cb_grain_block =
329       (int *)aom_malloc(sizeof(**cb_grain_block) * chroma_grain_samples);
330   *cr_grain_block =
331       (int *)aom_malloc(sizeof(**cr_grain_block) * chroma_grain_samples);
332 }
333 
dealloc_arrays(const aom_film_grain_t * params,int *** pred_pos_luma,int *** pred_pos_chroma,int ** luma_grain_block,int ** cb_grain_block,int ** cr_grain_block,int ** y_line_buf,int ** cb_line_buf,int ** cr_line_buf,int ** y_col_buf,int ** cb_col_buf,int ** cr_col_buf)334 static void dealloc_arrays(const aom_film_grain_t *params, int ***pred_pos_luma,
335                            int ***pred_pos_chroma, int **luma_grain_block,
336                            int **cb_grain_block, int **cr_grain_block,
337                            int **y_line_buf, int **cb_line_buf,
338                            int **cr_line_buf, int **y_col_buf, int **cb_col_buf,
339                            int **cr_col_buf) {
340   int num_pos_luma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
341   int num_pos_chroma = num_pos_luma;
342   if (params->num_y_points > 0) ++num_pos_chroma;
343 
344   for (int row = 0; row < num_pos_luma; row++) {
345     aom_free((*pred_pos_luma)[row]);
346   }
347   aom_free(*pred_pos_luma);
348 
349   for (int row = 0; row < num_pos_chroma; row++) {
350     aom_free((*pred_pos_chroma)[row]);
351   }
352   aom_free((*pred_pos_chroma));
353 
354   aom_free(*y_line_buf);
355 
356   aom_free(*cb_line_buf);
357 
358   aom_free(*cr_line_buf);
359 
360   aom_free(*y_col_buf);
361 
362   aom_free(*cb_col_buf);
363 
364   aom_free(*cr_col_buf);
365 
366   aom_free(*luma_grain_block);
367 
368   aom_free(*cb_grain_block);
369 
370   aom_free(*cr_grain_block);
371 }
372 
373 // get a number between 0 and 2^bits - 1
get_random_number(int bits)374 static INLINE int get_random_number(int bits) {
375   uint16_t bit;
376   bit = ((random_register >> 0) ^ (random_register >> 1) ^
377          (random_register >> 3) ^ (random_register >> 12)) &
378         1;
379   random_register = (random_register >> 1) | (bit << 15);
380   return (random_register >> (16 - bits)) & ((1 << bits) - 1);
381 }
382 
init_random_generator(int luma_line,uint16_t seed)383 static void init_random_generator(int luma_line, uint16_t seed) {
384   // same for the picture
385 
386   uint16_t msb = (seed >> 8) & 255;
387   uint16_t lsb = seed & 255;
388 
389   random_register = (msb << 8) + lsb;
390 
391   //  changes for each row
392   int luma_num = luma_line >> 5;
393 
394   random_register ^= ((luma_num * 37 + 178) & 255) << 8;
395   random_register ^= ((luma_num * 173 + 105) & 255);
396 }
397 
398 // Return 0 for success, -1 for failure
generate_luma_grain_block(const aom_film_grain_t * params,int ** pred_pos_luma,int * luma_grain_block,int luma_block_size_y,int luma_block_size_x,int luma_grain_stride,int left_pad,int top_pad,int right_pad,int bottom_pad)399 static int generate_luma_grain_block(
400     const aom_film_grain_t *params, int **pred_pos_luma, int *luma_grain_block,
401     int luma_block_size_y, int luma_block_size_x, int luma_grain_stride,
402     int left_pad, int top_pad, int right_pad, int bottom_pad) {
403   if (params->num_y_points == 0) {
404     memset(luma_grain_block, 0,
405            sizeof(*luma_grain_block) * luma_block_size_y * luma_grain_stride);
406     return 0;
407   }
408 
409   int bit_depth = params->bit_depth;
410   int gauss_sec_shift = 12 - bit_depth + params->grain_scale_shift;
411 
412   int num_pos_luma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
413   int rounding_offset = (1 << (params->ar_coeff_shift - 1));
414 
415   for (int i = 0; i < luma_block_size_y; i++)
416     for (int j = 0; j < luma_block_size_x; j++)
417       luma_grain_block[i * luma_grain_stride + j] =
418           (gaussian_sequence[get_random_number(gauss_bits)] +
419            ((1 << gauss_sec_shift) >> 1)) >>
420           gauss_sec_shift;
421 
422   for (int i = top_pad; i < luma_block_size_y - bottom_pad; i++)
423     for (int j = left_pad; j < luma_block_size_x - right_pad; j++) {
424       int wsum = 0;
425       for (int pos = 0; pos < num_pos_luma; pos++) {
426         wsum = wsum + params->ar_coeffs_y[pos] *
427                           luma_grain_block[(i + pred_pos_luma[pos][0]) *
428                                                luma_grain_stride +
429                                            j + pred_pos_luma[pos][1]];
430       }
431       luma_grain_block[i * luma_grain_stride + j] =
432           clamp(luma_grain_block[i * luma_grain_stride + j] +
433                     ((wsum + rounding_offset) >> params->ar_coeff_shift),
434                 grain_min, grain_max);
435     }
436   return 0;
437 }
438 
439 // Return 0 for success, -1 for failure
generate_chroma_grain_blocks(const aom_film_grain_t * params,int ** pred_pos_chroma,int * luma_grain_block,int * cb_grain_block,int * cr_grain_block,int luma_grain_stride,int chroma_block_size_y,int chroma_block_size_x,int chroma_grain_stride,int left_pad,int top_pad,int right_pad,int bottom_pad,int chroma_subsamp_y,int chroma_subsamp_x)440 static int generate_chroma_grain_blocks(
441     const aom_film_grain_t *params,
442     //                                  int** pred_pos_luma,
443     int **pred_pos_chroma, int *luma_grain_block, int *cb_grain_block,
444     int *cr_grain_block, int luma_grain_stride, int chroma_block_size_y,
445     int chroma_block_size_x, int chroma_grain_stride, int left_pad, int top_pad,
446     int right_pad, int bottom_pad, int chroma_subsamp_y, int chroma_subsamp_x) {
447   int bit_depth = params->bit_depth;
448   int gauss_sec_shift = 12 - bit_depth + params->grain_scale_shift;
449 
450   int num_pos_chroma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
451   if (params->num_y_points > 0) ++num_pos_chroma;
452   int rounding_offset = (1 << (params->ar_coeff_shift - 1));
453   int chroma_grain_block_size = chroma_block_size_y * chroma_grain_stride;
454 
455   if (params->num_cb_points || params->chroma_scaling_from_luma) {
456     init_random_generator(7 << 5, params->random_seed);
457 
458     for (int i = 0; i < chroma_block_size_y; i++)
459       for (int j = 0; j < chroma_block_size_x; j++)
460         cb_grain_block[i * chroma_grain_stride + j] =
461             (gaussian_sequence[get_random_number(gauss_bits)] +
462              ((1 << gauss_sec_shift) >> 1)) >>
463             gauss_sec_shift;
464   } else {
465     memset(cb_grain_block, 0,
466            sizeof(*cb_grain_block) * chroma_grain_block_size);
467   }
468 
469   if (params->num_cr_points || params->chroma_scaling_from_luma) {
470     init_random_generator(11 << 5, params->random_seed);
471 
472     for (int i = 0; i < chroma_block_size_y; i++)
473       for (int j = 0; j < chroma_block_size_x; j++)
474         cr_grain_block[i * chroma_grain_stride + j] =
475             (gaussian_sequence[get_random_number(gauss_bits)] +
476              ((1 << gauss_sec_shift) >> 1)) >>
477             gauss_sec_shift;
478   } else {
479     memset(cr_grain_block, 0,
480            sizeof(*cr_grain_block) * chroma_grain_block_size);
481   }
482 
483   for (int i = top_pad; i < chroma_block_size_y - bottom_pad; i++)
484     for (int j = left_pad; j < chroma_block_size_x - right_pad; j++) {
485       int wsum_cb = 0;
486       int wsum_cr = 0;
487       for (int pos = 0; pos < num_pos_chroma; pos++) {
488         if (pred_pos_chroma[pos][2] == 0) {
489           wsum_cb = wsum_cb + params->ar_coeffs_cb[pos] *
490                                   cb_grain_block[(i + pred_pos_chroma[pos][0]) *
491                                                      chroma_grain_stride +
492                                                  j + pred_pos_chroma[pos][1]];
493           wsum_cr = wsum_cr + params->ar_coeffs_cr[pos] *
494                                   cr_grain_block[(i + pred_pos_chroma[pos][0]) *
495                                                      chroma_grain_stride +
496                                                  j + pred_pos_chroma[pos][1]];
497         } else if (pred_pos_chroma[pos][2] == 1) {
498           int av_luma = 0;
499           int luma_coord_y = ((i - top_pad) << chroma_subsamp_y) + top_pad;
500           int luma_coord_x = ((j - left_pad) << chroma_subsamp_x) + left_pad;
501 
502           for (int k = luma_coord_y; k < luma_coord_y + chroma_subsamp_y + 1;
503                k++)
504             for (int l = luma_coord_x; l < luma_coord_x + chroma_subsamp_x + 1;
505                  l++)
506               av_luma += luma_grain_block[k * luma_grain_stride + l];
507 
508           av_luma =
509               (av_luma + ((1 << (chroma_subsamp_y + chroma_subsamp_x)) >> 1)) >>
510               (chroma_subsamp_y + chroma_subsamp_x);
511 
512           wsum_cb = wsum_cb + params->ar_coeffs_cb[pos] * av_luma;
513           wsum_cr = wsum_cr + params->ar_coeffs_cr[pos] * av_luma;
514         } else {
515           fprintf(
516               stderr,
517               "Grain synthesis: prediction between two chroma components is "
518               "not supported!");
519           return -1;
520         }
521       }
522       if (params->num_cb_points || params->chroma_scaling_from_luma)
523         cb_grain_block[i * chroma_grain_stride + j] =
524             clamp(cb_grain_block[i * chroma_grain_stride + j] +
525                       ((wsum_cb + rounding_offset) >> params->ar_coeff_shift),
526                   grain_min, grain_max);
527       if (params->num_cr_points || params->chroma_scaling_from_luma)
528         cr_grain_block[i * chroma_grain_stride + j] =
529             clamp(cr_grain_block[i * chroma_grain_stride + j] +
530                       ((wsum_cr + rounding_offset) >> params->ar_coeff_shift),
531                   grain_min, grain_max);
532     }
533   return 0;
534 }
535 
init_scaling_function(const int scaling_points[][2],int num_points,int scaling_lut[])536 static void init_scaling_function(const int scaling_points[][2], int num_points,
537                                   int scaling_lut[]) {
538   if (num_points == 0) return;
539 
540   for (int i = 0; i < scaling_points[0][0]; i++)
541     scaling_lut[i] = scaling_points[0][1];
542 
543   for (int point = 0; point < num_points - 1; point++) {
544     int delta_y = scaling_points[point + 1][1] - scaling_points[point][1];
545     int delta_x = scaling_points[point + 1][0] - scaling_points[point][0];
546 
547     int64_t delta = delta_y * ((65536 + (delta_x >> 1)) / delta_x);
548 
549     for (int x = 0; x < delta_x; x++) {
550       scaling_lut[scaling_points[point][0] + x] =
551           scaling_points[point][1] + (int)((x * delta + 32768) >> 16);
552     }
553   }
554 
555   for (int i = scaling_points[num_points - 1][0]; i < 256; i++)
556     scaling_lut[i] = scaling_points[num_points - 1][1];
557 }
558 
559 // function that extracts samples from a LUT (and interpolates intemediate
560 // frames for 10- and 12-bit video)
scale_LUT(int * scaling_lut,int index,int bit_depth)561 static int scale_LUT(int *scaling_lut, int index, int bit_depth) {
562   int x = index >> (bit_depth - 8);
563 
564   if (!(bit_depth - 8) || x == 255)
565     return scaling_lut[x];
566   else
567     return scaling_lut[x] + (((scaling_lut[x + 1] - scaling_lut[x]) *
568                                   (index & ((1 << (bit_depth - 8)) - 1)) +
569                               (1 << (bit_depth - 9))) >>
570                              (bit_depth - 8));
571 }
572 
add_noise_to_block(const aom_film_grain_t * params,uint8_t * luma,uint8_t * cb,uint8_t * cr,int luma_stride,int chroma_stride,int * luma_grain,int * cb_grain,int * cr_grain,int luma_grain_stride,int chroma_grain_stride,int half_luma_height,int half_luma_width,int bit_depth,int chroma_subsamp_y,int chroma_subsamp_x,int mc_identity)573 static void add_noise_to_block(const aom_film_grain_t *params, uint8_t *luma,
574                                uint8_t *cb, uint8_t *cr, int luma_stride,
575                                int chroma_stride, int *luma_grain,
576                                int *cb_grain, int *cr_grain,
577                                int luma_grain_stride, int chroma_grain_stride,
578                                int half_luma_height, int half_luma_width,
579                                int bit_depth, int chroma_subsamp_y,
580                                int chroma_subsamp_x, int mc_identity) {
581   int cb_mult = params->cb_mult - 128;            // fixed scale
582   int cb_luma_mult = params->cb_luma_mult - 128;  // fixed scale
583   int cb_offset = params->cb_offset - 256;
584 
585   int cr_mult = params->cr_mult - 128;            // fixed scale
586   int cr_luma_mult = params->cr_luma_mult - 128;  // fixed scale
587   int cr_offset = params->cr_offset - 256;
588 
589   int rounding_offset = (1 << (params->scaling_shift - 1));
590 
591   int apply_y = params->num_y_points > 0 ? 1 : 0;
592   int apply_cb =
593       (params->num_cb_points > 0 || params->chroma_scaling_from_luma) ? 1 : 0;
594   int apply_cr =
595       (params->num_cr_points > 0 || params->chroma_scaling_from_luma) ? 1 : 0;
596 
597   if (params->chroma_scaling_from_luma) {
598     cb_mult = 0;        // fixed scale
599     cb_luma_mult = 64;  // fixed scale
600     cb_offset = 0;
601 
602     cr_mult = 0;        // fixed scale
603     cr_luma_mult = 64;  // fixed scale
604     cr_offset = 0;
605   }
606 
607   int min_luma, max_luma, min_chroma, max_chroma;
608 
609   if (params->clip_to_restricted_range) {
610     min_luma = min_luma_legal_range;
611     max_luma = max_luma_legal_range;
612 
613     if (mc_identity) {
614       min_chroma = min_luma_legal_range;
615       max_chroma = max_luma_legal_range;
616     } else {
617       min_chroma = min_chroma_legal_range;
618       max_chroma = max_chroma_legal_range;
619     }
620   } else {
621     min_luma = min_chroma = 0;
622     max_luma = max_chroma = 255;
623   }
624 
625   for (int i = 0; i < (half_luma_height << (1 - chroma_subsamp_y)); i++) {
626     for (int j = 0; j < (half_luma_width << (1 - chroma_subsamp_x)); j++) {
627       int average_luma = 0;
628       if (chroma_subsamp_x) {
629         average_luma = (luma[(i << chroma_subsamp_y) * luma_stride +
630                              (j << chroma_subsamp_x)] +
631                         luma[(i << chroma_subsamp_y) * luma_stride +
632                              (j << chroma_subsamp_x) + 1] +
633                         1) >>
634                        1;
635       } else {
636         average_luma = luma[(i << chroma_subsamp_y) * luma_stride + j];
637       }
638 
639       if (apply_cb) {
640         cb[i * chroma_stride + j] = clamp(
641             cb[i * chroma_stride + j] +
642                 ((scale_LUT(scaling_lut_cb,
643                             clamp(((average_luma * cb_luma_mult +
644                                     cb_mult * cb[i * chroma_stride + j]) >>
645                                    6) +
646                                       cb_offset,
647                                   0, (256 << (bit_depth - 8)) - 1),
648                             8) *
649                       cb_grain[i * chroma_grain_stride + j] +
650                   rounding_offset) >>
651                  params->scaling_shift),
652             min_chroma, max_chroma);
653       }
654 
655       if (apply_cr) {
656         cr[i * chroma_stride + j] = clamp(
657             cr[i * chroma_stride + j] +
658                 ((scale_LUT(scaling_lut_cr,
659                             clamp(((average_luma * cr_luma_mult +
660                                     cr_mult * cr[i * chroma_stride + j]) >>
661                                    6) +
662                                       cr_offset,
663                                   0, (256 << (bit_depth - 8)) - 1),
664                             8) *
665                       cr_grain[i * chroma_grain_stride + j] +
666                   rounding_offset) >>
667                  params->scaling_shift),
668             min_chroma, max_chroma);
669       }
670     }
671   }
672 
673   if (apply_y) {
674     for (int i = 0; i < (half_luma_height << 1); i++) {
675       for (int j = 0; j < (half_luma_width << 1); j++) {
676         luma[i * luma_stride + j] =
677             clamp(luma[i * luma_stride + j] +
678                       ((scale_LUT(scaling_lut_y, luma[i * luma_stride + j], 8) *
679                             luma_grain[i * luma_grain_stride + j] +
680                         rounding_offset) >>
681                        params->scaling_shift),
682                   min_luma, max_luma);
683       }
684     }
685   }
686 }
687 
add_noise_to_block_hbd(const aom_film_grain_t * params,uint16_t * luma,uint16_t * cb,uint16_t * cr,int luma_stride,int chroma_stride,int * luma_grain,int * cb_grain,int * cr_grain,int luma_grain_stride,int chroma_grain_stride,int half_luma_height,int half_luma_width,int bit_depth,int chroma_subsamp_y,int chroma_subsamp_x,int mc_identity)688 static void add_noise_to_block_hbd(
689     const aom_film_grain_t *params, uint16_t *luma, uint16_t *cb, uint16_t *cr,
690     int luma_stride, int chroma_stride, int *luma_grain, int *cb_grain,
691     int *cr_grain, int luma_grain_stride, int chroma_grain_stride,
692     int half_luma_height, int half_luma_width, int bit_depth,
693     int chroma_subsamp_y, int chroma_subsamp_x, int mc_identity) {
694   int cb_mult = params->cb_mult - 128;            // fixed scale
695   int cb_luma_mult = params->cb_luma_mult - 128;  // fixed scale
696   // offset value depends on the bit depth
697   int cb_offset = (params->cb_offset << (bit_depth - 8)) - (1 << bit_depth);
698 
699   int cr_mult = params->cr_mult - 128;            // fixed scale
700   int cr_luma_mult = params->cr_luma_mult - 128;  // fixed scale
701   // offset value depends on the bit depth
702   int cr_offset = (params->cr_offset << (bit_depth - 8)) - (1 << bit_depth);
703 
704   int rounding_offset = (1 << (params->scaling_shift - 1));
705 
706   int apply_y = params->num_y_points > 0 ? 1 : 0;
707   int apply_cb =
708       (params->num_cb_points > 0 || params->chroma_scaling_from_luma) > 0 ? 1
709                                                                           : 0;
710   int apply_cr =
711       (params->num_cr_points > 0 || params->chroma_scaling_from_luma) > 0 ? 1
712                                                                           : 0;
713 
714   if (params->chroma_scaling_from_luma) {
715     cb_mult = 0;        // fixed scale
716     cb_luma_mult = 64;  // fixed scale
717     cb_offset = 0;
718 
719     cr_mult = 0;        // fixed scale
720     cr_luma_mult = 64;  // fixed scale
721     cr_offset = 0;
722   }
723 
724   int min_luma, max_luma, min_chroma, max_chroma;
725 
726   if (params->clip_to_restricted_range) {
727     min_luma = min_luma_legal_range << (bit_depth - 8);
728     max_luma = max_luma_legal_range << (bit_depth - 8);
729 
730     if (mc_identity) {
731       min_chroma = min_luma_legal_range << (bit_depth - 8);
732       max_chroma = max_luma_legal_range << (bit_depth - 8);
733     } else {
734       min_chroma = min_chroma_legal_range << (bit_depth - 8);
735       max_chroma = max_chroma_legal_range << (bit_depth - 8);
736     }
737   } else {
738     min_luma = min_chroma = 0;
739     max_luma = max_chroma = (256 << (bit_depth - 8)) - 1;
740   }
741 
742   for (int i = 0; i < (half_luma_height << (1 - chroma_subsamp_y)); i++) {
743     for (int j = 0; j < (half_luma_width << (1 - chroma_subsamp_x)); j++) {
744       int average_luma = 0;
745       if (chroma_subsamp_x) {
746         average_luma = (luma[(i << chroma_subsamp_y) * luma_stride +
747                              (j << chroma_subsamp_x)] +
748                         luma[(i << chroma_subsamp_y) * luma_stride +
749                              (j << chroma_subsamp_x) + 1] +
750                         1) >>
751                        1;
752       } else {
753         average_luma = luma[(i << chroma_subsamp_y) * luma_stride + j];
754       }
755 
756       if (apply_cb) {
757         cb[i * chroma_stride + j] = clamp(
758             cb[i * chroma_stride + j] +
759                 ((scale_LUT(scaling_lut_cb,
760                             clamp(((average_luma * cb_luma_mult +
761                                     cb_mult * cb[i * chroma_stride + j]) >>
762                                    6) +
763                                       cb_offset,
764                                   0, (256 << (bit_depth - 8)) - 1),
765                             bit_depth) *
766                       cb_grain[i * chroma_grain_stride + j] +
767                   rounding_offset) >>
768                  params->scaling_shift),
769             min_chroma, max_chroma);
770       }
771       if (apply_cr) {
772         cr[i * chroma_stride + j] = clamp(
773             cr[i * chroma_stride + j] +
774                 ((scale_LUT(scaling_lut_cr,
775                             clamp(((average_luma * cr_luma_mult +
776                                     cr_mult * cr[i * chroma_stride + j]) >>
777                                    6) +
778                                       cr_offset,
779                                   0, (256 << (bit_depth - 8)) - 1),
780                             bit_depth) *
781                       cr_grain[i * chroma_grain_stride + j] +
782                   rounding_offset) >>
783                  params->scaling_shift),
784             min_chroma, max_chroma);
785       }
786     }
787   }
788 
789   if (apply_y) {
790     for (int i = 0; i < (half_luma_height << 1); i++) {
791       for (int j = 0; j < (half_luma_width << 1); j++) {
792         luma[i * luma_stride + j] =
793             clamp(luma[i * luma_stride + j] +
794                       ((scale_LUT(scaling_lut_y, luma[i * luma_stride + j],
795                                   bit_depth) *
796                             luma_grain[i * luma_grain_stride + j] +
797                         rounding_offset) >>
798                        params->scaling_shift),
799                   min_luma, max_luma);
800       }
801     }
802   }
803 }
804 
copy_rect(uint8_t * src,int src_stride,uint8_t * dst,int dst_stride,int width,int height,int use_high_bit_depth)805 static void copy_rect(uint8_t *src, int src_stride, uint8_t *dst,
806                       int dst_stride, int width, int height,
807                       int use_high_bit_depth) {
808   int hbd_coeff = use_high_bit_depth ? 2 : 1;
809   while (height) {
810     memcpy(dst, src, width * sizeof(uint8_t) * hbd_coeff);
811     src += src_stride;
812     dst += dst_stride;
813     --height;
814   }
815   return;
816 }
817 
copy_area(int * src,int src_stride,int * dst,int dst_stride,int width,int height)818 static void copy_area(int *src, int src_stride, int *dst, int dst_stride,
819                       int width, int height) {
820   while (height) {
821     memcpy(dst, src, width * sizeof(*src));
822     src += src_stride;
823     dst += dst_stride;
824     --height;
825   }
826   return;
827 }
828 
extend_even(uint8_t * dst,int dst_stride,int width,int height,int use_high_bit_depth)829 static void extend_even(uint8_t *dst, int dst_stride, int width, int height,
830                         int use_high_bit_depth) {
831   if ((width & 1) == 0 && (height & 1) == 0) return;
832   if (use_high_bit_depth) {
833     uint16_t *dst16 = (uint16_t *)dst;
834     int dst16_stride = dst_stride / 2;
835     if (width & 1) {
836       for (int i = 0; i < height; ++i)
837         dst16[i * dst16_stride + width] = dst16[i * dst16_stride + width - 1];
838     }
839     width = (width + 1) & (~1);
840     if (height & 1) {
841       memcpy(&dst16[height * dst16_stride], &dst16[(height - 1) * dst16_stride],
842              sizeof(*dst16) * width);
843     }
844   } else {
845     if (width & 1) {
846       for (int i = 0; i < height; ++i)
847         dst[i * dst_stride + width] = dst[i * dst_stride + width - 1];
848     }
849     width = (width + 1) & (~1);
850     if (height & 1) {
851       memcpy(&dst[height * dst_stride], &dst[(height - 1) * dst_stride],
852              sizeof(*dst) * width);
853     }
854   }
855 }
856 
ver_boundary_overlap(int * left_block,int left_stride,int * right_block,int right_stride,int * dst_block,int dst_stride,int width,int height)857 static void ver_boundary_overlap(int *left_block, int left_stride,
858                                  int *right_block, int right_stride,
859                                  int *dst_block, int dst_stride, int width,
860                                  int height) {
861   if (width == 1) {
862     while (height) {
863       *dst_block = clamp((*left_block * 23 + *right_block * 22 + 16) >> 5,
864                          grain_min, grain_max);
865       left_block += left_stride;
866       right_block += right_stride;
867       dst_block += dst_stride;
868       --height;
869     }
870     return;
871   } else if (width == 2) {
872     while (height) {
873       dst_block[0] = clamp((27 * left_block[0] + 17 * right_block[0] + 16) >> 5,
874                            grain_min, grain_max);
875       dst_block[1] = clamp((17 * left_block[1] + 27 * right_block[1] + 16) >> 5,
876                            grain_min, grain_max);
877       left_block += left_stride;
878       right_block += right_stride;
879       dst_block += dst_stride;
880       --height;
881     }
882     return;
883   }
884 }
885 
hor_boundary_overlap(int * top_block,int top_stride,int * bottom_block,int bottom_stride,int * dst_block,int dst_stride,int width,int height)886 static void hor_boundary_overlap(int *top_block, int top_stride,
887                                  int *bottom_block, int bottom_stride,
888                                  int *dst_block, int dst_stride, int width,
889                                  int height) {
890   if (height == 1) {
891     while (width) {
892       *dst_block = clamp((*top_block * 23 + *bottom_block * 22 + 16) >> 5,
893                          grain_min, grain_max);
894       ++top_block;
895       ++bottom_block;
896       ++dst_block;
897       --width;
898     }
899     return;
900   } else if (height == 2) {
901     while (width) {
902       dst_block[0] = clamp((27 * top_block[0] + 17 * bottom_block[0] + 16) >> 5,
903                            grain_min, grain_max);
904       dst_block[dst_stride] = clamp((17 * top_block[top_stride] +
905                                      27 * bottom_block[bottom_stride] + 16) >>
906                                         5,
907                                     grain_min, grain_max);
908       ++top_block;
909       ++bottom_block;
910       ++dst_block;
911       --width;
912     }
913     return;
914   }
915 }
916 
av1_add_film_grain(const aom_film_grain_t * params,const aom_image_t * src,aom_image_t * dst)917 int av1_add_film_grain(const aom_film_grain_t *params, const aom_image_t *src,
918                        aom_image_t *dst) {
919   uint8_t *luma, *cb, *cr;
920   int height, width, luma_stride, chroma_stride;
921   int use_high_bit_depth = 0;
922   int chroma_subsamp_x = 0;
923   int chroma_subsamp_y = 0;
924   int mc_identity = src->mc == AOM_CICP_MC_IDENTITY ? 1 : 0;
925 
926   switch (src->fmt) {
927     case AOM_IMG_FMT_AOMI420:
928     case AOM_IMG_FMT_I420:
929       use_high_bit_depth = 0;
930       chroma_subsamp_x = 1;
931       chroma_subsamp_y = 1;
932       break;
933     case AOM_IMG_FMT_I42016:
934       use_high_bit_depth = 1;
935       chroma_subsamp_x = 1;
936       chroma_subsamp_y = 1;
937       break;
938       //    case AOM_IMG_FMT_444A:
939     case AOM_IMG_FMT_I444:
940       use_high_bit_depth = 0;
941       chroma_subsamp_x = 0;
942       chroma_subsamp_y = 0;
943       break;
944     case AOM_IMG_FMT_I44416:
945       use_high_bit_depth = 1;
946       chroma_subsamp_x = 0;
947       chroma_subsamp_y = 0;
948       break;
949     case AOM_IMG_FMT_I422:
950       use_high_bit_depth = 0;
951       chroma_subsamp_x = 1;
952       chroma_subsamp_y = 0;
953       break;
954     case AOM_IMG_FMT_I42216:
955       use_high_bit_depth = 1;
956       chroma_subsamp_x = 1;
957       chroma_subsamp_y = 0;
958       break;
959     default:  // unknown input format
960       fprintf(stderr, "Film grain error: input format is not supported!");
961       return -1;
962   }
963 
964   assert(params->bit_depth == src->bit_depth);
965 
966   dst->fmt = src->fmt;
967   dst->bit_depth = src->bit_depth;
968 
969   dst->r_w = src->r_w;
970   dst->r_h = src->r_h;
971   dst->d_w = src->d_w;
972   dst->d_h = src->d_h;
973 
974   dst->cp = src->cp;
975   dst->tc = src->tc;
976   dst->mc = src->mc;
977 
978   dst->monochrome = src->monochrome;
979   dst->csp = src->csp;
980   dst->range = src->range;
981 
982   dst->x_chroma_shift = src->x_chroma_shift;
983   dst->y_chroma_shift = src->y_chroma_shift;
984 
985   dst->temporal_id = src->temporal_id;
986   dst->spatial_id = src->spatial_id;
987 
988   width = src->d_w % 2 ? src->d_w + 1 : src->d_w;
989   height = src->d_h % 2 ? src->d_h + 1 : src->d_h;
990 
991   copy_rect(src->planes[AOM_PLANE_Y], src->stride[AOM_PLANE_Y],
992             dst->planes[AOM_PLANE_Y], dst->stride[AOM_PLANE_Y], src->d_w,
993             src->d_h, use_high_bit_depth);
994   // Note that dst is already assumed to be aligned to even.
995   extend_even(dst->planes[AOM_PLANE_Y], dst->stride[AOM_PLANE_Y], src->d_w,
996               src->d_h, use_high_bit_depth);
997 
998   if (!src->monochrome) {
999     copy_rect(src->planes[AOM_PLANE_U], src->stride[AOM_PLANE_U],
1000               dst->planes[AOM_PLANE_U], dst->stride[AOM_PLANE_U],
1001               width >> chroma_subsamp_x, height >> chroma_subsamp_y,
1002               use_high_bit_depth);
1003 
1004     copy_rect(src->planes[AOM_PLANE_V], src->stride[AOM_PLANE_V],
1005               dst->planes[AOM_PLANE_V], dst->stride[AOM_PLANE_V],
1006               width >> chroma_subsamp_x, height >> chroma_subsamp_y,
1007               use_high_bit_depth);
1008   }
1009 
1010   luma = dst->planes[AOM_PLANE_Y];
1011   cb = dst->planes[AOM_PLANE_U];
1012   cr = dst->planes[AOM_PLANE_V];
1013 
1014   // luma and chroma strides in samples
1015   luma_stride = dst->stride[AOM_PLANE_Y] >> use_high_bit_depth;
1016   chroma_stride = dst->stride[AOM_PLANE_U] >> use_high_bit_depth;
1017 
1018   return av1_add_film_grain_run(
1019       params, luma, cb, cr, height, width, luma_stride, chroma_stride,
1020       use_high_bit_depth, chroma_subsamp_y, chroma_subsamp_x, mc_identity);
1021 }
1022 
av1_add_film_grain_run(const aom_film_grain_t * params,uint8_t * luma,uint8_t * cb,uint8_t * cr,int height,int width,int luma_stride,int chroma_stride,int use_high_bit_depth,int chroma_subsamp_y,int chroma_subsamp_x,int mc_identity)1023 int av1_add_film_grain_run(const aom_film_grain_t *params, uint8_t *luma,
1024                            uint8_t *cb, uint8_t *cr, int height, int width,
1025                            int luma_stride, int chroma_stride,
1026                            int use_high_bit_depth, int chroma_subsamp_y,
1027                            int chroma_subsamp_x, int mc_identity) {
1028   int **pred_pos_luma;
1029   int **pred_pos_chroma;
1030   int *luma_grain_block;
1031   int *cb_grain_block;
1032   int *cr_grain_block;
1033 
1034   int *y_line_buf;
1035   int *cb_line_buf;
1036   int *cr_line_buf;
1037 
1038   int *y_col_buf;
1039   int *cb_col_buf;
1040   int *cr_col_buf;
1041 
1042   random_register = params->random_seed;
1043 
1044   int left_pad = 3;
1045   int right_pad = 3;  // padding to offset for AR coefficients
1046   int top_pad = 3;
1047   int bottom_pad = 0;
1048 
1049   int ar_padding = 3;  // maximum lag used for stabilization of AR coefficients
1050 
1051   luma_subblock_size_y = 32;
1052   luma_subblock_size_x = 32;
1053 
1054   chroma_subblock_size_y = luma_subblock_size_y >> chroma_subsamp_y;
1055   chroma_subblock_size_x = luma_subblock_size_x >> chroma_subsamp_x;
1056 
1057   // Initial padding is only needed for generation of
1058   // film grain templates (to stabilize the AR process)
1059   // Only a 64x64 luma and 32x32 chroma part of a template
1060   // is used later for adding grain, padding can be discarded
1061 
1062   int luma_block_size_y =
1063       top_pad + 2 * ar_padding + luma_subblock_size_y * 2 + bottom_pad;
1064   int luma_block_size_x = left_pad + 2 * ar_padding + luma_subblock_size_x * 2 +
1065                           2 * ar_padding + right_pad;
1066 
1067   int chroma_block_size_y = top_pad + (2 >> chroma_subsamp_y) * ar_padding +
1068                             chroma_subblock_size_y * 2 + bottom_pad;
1069   int chroma_block_size_x = left_pad + (2 >> chroma_subsamp_x) * ar_padding +
1070                             chroma_subblock_size_x * 2 +
1071                             (2 >> chroma_subsamp_x) * ar_padding + right_pad;
1072 
1073   int luma_grain_stride = luma_block_size_x;
1074   int chroma_grain_stride = chroma_block_size_x;
1075 
1076   int overlap = params->overlap_flag;
1077   int bit_depth = params->bit_depth;
1078 
1079   const int grain_center = 128 << (bit_depth - 8);
1080   grain_min = 0 - grain_center;
1081   grain_max = grain_center - 1;
1082 
1083   init_arrays(params, luma_stride, chroma_stride, &pred_pos_luma,
1084               &pred_pos_chroma, &luma_grain_block, &cb_grain_block,
1085               &cr_grain_block, &y_line_buf, &cb_line_buf, &cr_line_buf,
1086               &y_col_buf, &cb_col_buf, &cr_col_buf,
1087               luma_block_size_y * luma_block_size_x,
1088               chroma_block_size_y * chroma_block_size_x, chroma_subsamp_y,
1089               chroma_subsamp_x);
1090 
1091   if (generate_luma_grain_block(params, pred_pos_luma, luma_grain_block,
1092                                 luma_block_size_y, luma_block_size_x,
1093                                 luma_grain_stride, left_pad, top_pad, right_pad,
1094                                 bottom_pad))
1095     return -1;
1096 
1097   if (generate_chroma_grain_blocks(
1098           params,
1099           //                               pred_pos_luma,
1100           pred_pos_chroma, luma_grain_block, cb_grain_block, cr_grain_block,
1101           luma_grain_stride, chroma_block_size_y, chroma_block_size_x,
1102           chroma_grain_stride, left_pad, top_pad, right_pad, bottom_pad,
1103           chroma_subsamp_y, chroma_subsamp_x))
1104     return -1;
1105 
1106   init_scaling_function(params->scaling_points_y, params->num_y_points,
1107                         scaling_lut_y);
1108 
1109   if (params->chroma_scaling_from_luma) {
1110     memcpy(scaling_lut_cb, scaling_lut_y, sizeof(*scaling_lut_y) * 256);
1111     memcpy(scaling_lut_cr, scaling_lut_y, sizeof(*scaling_lut_y) * 256);
1112   } else {
1113     init_scaling_function(params->scaling_points_cb, params->num_cb_points,
1114                           scaling_lut_cb);
1115     init_scaling_function(params->scaling_points_cr, params->num_cr_points,
1116                           scaling_lut_cr);
1117   }
1118   for (int y = 0; y < height / 2; y += (luma_subblock_size_y >> 1)) {
1119     init_random_generator(y * 2, params->random_seed);
1120 
1121     for (int x = 0; x < width / 2; x += (luma_subblock_size_x >> 1)) {
1122       int offset_y = get_random_number(8);
1123       int offset_x = (offset_y >> 4) & 15;
1124       offset_y &= 15;
1125 
1126       int luma_offset_y = left_pad + 2 * ar_padding + (offset_y << 1);
1127       int luma_offset_x = top_pad + 2 * ar_padding + (offset_x << 1);
1128 
1129       int chroma_offset_y = top_pad + (2 >> chroma_subsamp_y) * ar_padding +
1130                             offset_y * (2 >> chroma_subsamp_y);
1131       int chroma_offset_x = left_pad + (2 >> chroma_subsamp_x) * ar_padding +
1132                             offset_x * (2 >> chroma_subsamp_x);
1133 
1134       if (overlap && x) {
1135         ver_boundary_overlap(
1136             y_col_buf, 2,
1137             luma_grain_block + luma_offset_y * luma_grain_stride +
1138                 luma_offset_x,
1139             luma_grain_stride, y_col_buf, 2, 2,
1140             AOMMIN(luma_subblock_size_y + 2, height - (y << 1)));
1141 
1142         ver_boundary_overlap(
1143             cb_col_buf, 2 >> chroma_subsamp_x,
1144             cb_grain_block + chroma_offset_y * chroma_grain_stride +
1145                 chroma_offset_x,
1146             chroma_grain_stride, cb_col_buf, 2 >> chroma_subsamp_x,
1147             2 >> chroma_subsamp_x,
1148             AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y),
1149                    (height - (y << 1)) >> chroma_subsamp_y));
1150 
1151         ver_boundary_overlap(
1152             cr_col_buf, 2 >> chroma_subsamp_x,
1153             cr_grain_block + chroma_offset_y * chroma_grain_stride +
1154                 chroma_offset_x,
1155             chroma_grain_stride, cr_col_buf, 2 >> chroma_subsamp_x,
1156             2 >> chroma_subsamp_x,
1157             AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y),
1158                    (height - (y << 1)) >> chroma_subsamp_y));
1159 
1160         int i = y ? 1 : 0;
1161 
1162         if (use_high_bit_depth) {
1163           add_noise_to_block_hbd(
1164               params,
1165               (uint16_t *)luma + ((y + i) << 1) * luma_stride + (x << 1),
1166               (uint16_t *)cb +
1167                   ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1168                   (x << (1 - chroma_subsamp_x)),
1169               (uint16_t *)cr +
1170                   ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1171                   (x << (1 - chroma_subsamp_x)),
1172               luma_stride, chroma_stride, y_col_buf + i * 4,
1173               cb_col_buf + i * (2 - chroma_subsamp_y) * (2 - chroma_subsamp_x),
1174               cr_col_buf + i * (2 - chroma_subsamp_y) * (2 - chroma_subsamp_x),
1175               2, (2 - chroma_subsamp_x),
1176               AOMMIN(luma_subblock_size_y >> 1, height / 2 - y) - i, 1,
1177               bit_depth, chroma_subsamp_y, chroma_subsamp_x, mc_identity);
1178         } else {
1179           add_noise_to_block(
1180               params, luma + ((y + i) << 1) * luma_stride + (x << 1),
1181               cb + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1182                   (x << (1 - chroma_subsamp_x)),
1183               cr + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1184                   (x << (1 - chroma_subsamp_x)),
1185               luma_stride, chroma_stride, y_col_buf + i * 4,
1186               cb_col_buf + i * (2 - chroma_subsamp_y) * (2 - chroma_subsamp_x),
1187               cr_col_buf + i * (2 - chroma_subsamp_y) * (2 - chroma_subsamp_x),
1188               2, (2 - chroma_subsamp_x),
1189               AOMMIN(luma_subblock_size_y >> 1, height / 2 - y) - i, 1,
1190               bit_depth, chroma_subsamp_y, chroma_subsamp_x, mc_identity);
1191         }
1192       }
1193 
1194       if (overlap && y) {
1195         if (x) {
1196           hor_boundary_overlap(y_line_buf + (x << 1), luma_stride, y_col_buf, 2,
1197                                y_line_buf + (x << 1), luma_stride, 2, 2);
1198 
1199           hor_boundary_overlap(cb_line_buf + x * (2 >> chroma_subsamp_x),
1200                                chroma_stride, cb_col_buf, 2 >> chroma_subsamp_x,
1201                                cb_line_buf + x * (2 >> chroma_subsamp_x),
1202                                chroma_stride, 2 >> chroma_subsamp_x,
1203                                2 >> chroma_subsamp_y);
1204 
1205           hor_boundary_overlap(cr_line_buf + x * (2 >> chroma_subsamp_x),
1206                                chroma_stride, cr_col_buf, 2 >> chroma_subsamp_x,
1207                                cr_line_buf + x * (2 >> chroma_subsamp_x),
1208                                chroma_stride, 2 >> chroma_subsamp_x,
1209                                2 >> chroma_subsamp_y);
1210         }
1211 
1212         hor_boundary_overlap(
1213             y_line_buf + ((x ? x + 1 : 0) << 1), luma_stride,
1214             luma_grain_block + luma_offset_y * luma_grain_stride +
1215                 luma_offset_x + (x ? 2 : 0),
1216             luma_grain_stride, y_line_buf + ((x ? x + 1 : 0) << 1), luma_stride,
1217             AOMMIN(luma_subblock_size_x - ((x ? 1 : 0) << 1),
1218                    width - ((x ? x + 1 : 0) << 1)),
1219             2);
1220 
1221         hor_boundary_overlap(
1222             cb_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
1223             chroma_stride,
1224             cb_grain_block + chroma_offset_y * chroma_grain_stride +
1225                 chroma_offset_x + ((x ? 1 : 0) << (1 - chroma_subsamp_x)),
1226             chroma_grain_stride,
1227             cb_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
1228             chroma_stride,
1229             AOMMIN(chroma_subblock_size_x -
1230                        ((x ? 1 : 0) << (1 - chroma_subsamp_x)),
1231                    (width - ((x ? x + 1 : 0) << 1)) >> chroma_subsamp_x),
1232             2 >> chroma_subsamp_y);
1233 
1234         hor_boundary_overlap(
1235             cr_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
1236             chroma_stride,
1237             cr_grain_block + chroma_offset_y * chroma_grain_stride +
1238                 chroma_offset_x + ((x ? 1 : 0) << (1 - chroma_subsamp_x)),
1239             chroma_grain_stride,
1240             cr_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
1241             chroma_stride,
1242             AOMMIN(chroma_subblock_size_x -
1243                        ((x ? 1 : 0) << (1 - chroma_subsamp_x)),
1244                    (width - ((x ? x + 1 : 0) << 1)) >> chroma_subsamp_x),
1245             2 >> chroma_subsamp_y);
1246 
1247         if (use_high_bit_depth) {
1248           add_noise_to_block_hbd(
1249               params, (uint16_t *)luma + (y << 1) * luma_stride + (x << 1),
1250               (uint16_t *)cb + (y << (1 - chroma_subsamp_y)) * chroma_stride +
1251                   (x << ((1 - chroma_subsamp_x))),
1252               (uint16_t *)cr + (y << (1 - chroma_subsamp_y)) * chroma_stride +
1253                   (x << ((1 - chroma_subsamp_x))),
1254               luma_stride, chroma_stride, y_line_buf + (x << 1),
1255               cb_line_buf + (x << (1 - chroma_subsamp_x)),
1256               cr_line_buf + (x << (1 - chroma_subsamp_x)), luma_stride,
1257               chroma_stride, 1,
1258               AOMMIN(luma_subblock_size_x >> 1, width / 2 - x), bit_depth,
1259               chroma_subsamp_y, chroma_subsamp_x, mc_identity);
1260         } else {
1261           add_noise_to_block(
1262               params, luma + (y << 1) * luma_stride + (x << 1),
1263               cb + (y << (1 - chroma_subsamp_y)) * chroma_stride +
1264                   (x << ((1 - chroma_subsamp_x))),
1265               cr + (y << (1 - chroma_subsamp_y)) * chroma_stride +
1266                   (x << ((1 - chroma_subsamp_x))),
1267               luma_stride, chroma_stride, y_line_buf + (x << 1),
1268               cb_line_buf + (x << (1 - chroma_subsamp_x)),
1269               cr_line_buf + (x << (1 - chroma_subsamp_x)), luma_stride,
1270               chroma_stride, 1,
1271               AOMMIN(luma_subblock_size_x >> 1, width / 2 - x), bit_depth,
1272               chroma_subsamp_y, chroma_subsamp_x, mc_identity);
1273         }
1274       }
1275 
1276       int i = overlap && y ? 1 : 0;
1277       int j = overlap && x ? 1 : 0;
1278 
1279       if (use_high_bit_depth) {
1280         add_noise_to_block_hbd(
1281             params,
1282             (uint16_t *)luma + ((y + i) << 1) * luma_stride + ((x + j) << 1),
1283             (uint16_t *)cb +
1284                 ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1285                 ((x + j) << (1 - chroma_subsamp_x)),
1286             (uint16_t *)cr +
1287                 ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1288                 ((x + j) << (1 - chroma_subsamp_x)),
1289             luma_stride, chroma_stride,
1290             luma_grain_block + (luma_offset_y + (i << 1)) * luma_grain_stride +
1291                 luma_offset_x + (j << 1),
1292             cb_grain_block +
1293                 (chroma_offset_y + (i << (1 - chroma_subsamp_y))) *
1294                     chroma_grain_stride +
1295                 chroma_offset_x + (j << (1 - chroma_subsamp_x)),
1296             cr_grain_block +
1297                 (chroma_offset_y + (i << (1 - chroma_subsamp_y))) *
1298                     chroma_grain_stride +
1299                 chroma_offset_x + (j << (1 - chroma_subsamp_x)),
1300             luma_grain_stride, chroma_grain_stride,
1301             AOMMIN(luma_subblock_size_y >> 1, height / 2 - y) - i,
1302             AOMMIN(luma_subblock_size_x >> 1, width / 2 - x) - j, bit_depth,
1303             chroma_subsamp_y, chroma_subsamp_x, mc_identity);
1304       } else {
1305         add_noise_to_block(
1306             params, luma + ((y + i) << 1) * luma_stride + ((x + j) << 1),
1307             cb + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1308                 ((x + j) << (1 - chroma_subsamp_x)),
1309             cr + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1310                 ((x + j) << (1 - chroma_subsamp_x)),
1311             luma_stride, chroma_stride,
1312             luma_grain_block + (luma_offset_y + (i << 1)) * luma_grain_stride +
1313                 luma_offset_x + (j << 1),
1314             cb_grain_block +
1315                 (chroma_offset_y + (i << (1 - chroma_subsamp_y))) *
1316                     chroma_grain_stride +
1317                 chroma_offset_x + (j << (1 - chroma_subsamp_x)),
1318             cr_grain_block +
1319                 (chroma_offset_y + (i << (1 - chroma_subsamp_y))) *
1320                     chroma_grain_stride +
1321                 chroma_offset_x + (j << (1 - chroma_subsamp_x)),
1322             luma_grain_stride, chroma_grain_stride,
1323             AOMMIN(luma_subblock_size_y >> 1, height / 2 - y) - i,
1324             AOMMIN(luma_subblock_size_x >> 1, width / 2 - x) - j, bit_depth,
1325             chroma_subsamp_y, chroma_subsamp_x, mc_identity);
1326       }
1327 
1328       if (overlap) {
1329         if (x) {
1330           // Copy overlapped column bufer to line buffer
1331           copy_area(y_col_buf + (luma_subblock_size_y << 1), 2,
1332                     y_line_buf + (x << 1), luma_stride, 2, 2);
1333 
1334           copy_area(
1335               cb_col_buf + (chroma_subblock_size_y << (1 - chroma_subsamp_x)),
1336               2 >> chroma_subsamp_x,
1337               cb_line_buf + (x << (1 - chroma_subsamp_x)), chroma_stride,
1338               2 >> chroma_subsamp_x, 2 >> chroma_subsamp_y);
1339 
1340           copy_area(
1341               cr_col_buf + (chroma_subblock_size_y << (1 - chroma_subsamp_x)),
1342               2 >> chroma_subsamp_x,
1343               cr_line_buf + (x << (1 - chroma_subsamp_x)), chroma_stride,
1344               2 >> chroma_subsamp_x, 2 >> chroma_subsamp_y);
1345         }
1346 
1347         // Copy grain to the line buffer for overlap with a bottom block
1348         copy_area(
1349             luma_grain_block +
1350                 (luma_offset_y + luma_subblock_size_y) * luma_grain_stride +
1351                 luma_offset_x + ((x ? 2 : 0)),
1352             luma_grain_stride, y_line_buf + ((x ? x + 1 : 0) << 1), luma_stride,
1353             AOMMIN(luma_subblock_size_x, width - (x << 1)) - (x ? 2 : 0), 2);
1354 
1355         copy_area(cb_grain_block +
1356                       (chroma_offset_y + chroma_subblock_size_y) *
1357                           chroma_grain_stride +
1358                       chroma_offset_x + (x ? 2 >> chroma_subsamp_x : 0),
1359                   chroma_grain_stride,
1360                   cb_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
1361                   chroma_stride,
1362                   AOMMIN(chroma_subblock_size_x,
1363                          ((width - (x << 1)) >> chroma_subsamp_x)) -
1364                       (x ? 2 >> chroma_subsamp_x : 0),
1365                   2 >> chroma_subsamp_y);
1366 
1367         copy_area(cr_grain_block +
1368                       (chroma_offset_y + chroma_subblock_size_y) *
1369                           chroma_grain_stride +
1370                       chroma_offset_x + (x ? 2 >> chroma_subsamp_x : 0),
1371                   chroma_grain_stride,
1372                   cr_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
1373                   chroma_stride,
1374                   AOMMIN(chroma_subblock_size_x,
1375                          ((width - (x << 1)) >> chroma_subsamp_x)) -
1376                       (x ? 2 >> chroma_subsamp_x : 0),
1377                   2 >> chroma_subsamp_y);
1378 
1379         // Copy grain to the column buffer for overlap with the next block to
1380         // the right
1381 
1382         copy_area(luma_grain_block + luma_offset_y * luma_grain_stride +
1383                       luma_offset_x + luma_subblock_size_x,
1384                   luma_grain_stride, y_col_buf, 2, 2,
1385                   AOMMIN(luma_subblock_size_y + 2, height - (y << 1)));
1386 
1387         copy_area(cb_grain_block + chroma_offset_y * chroma_grain_stride +
1388                       chroma_offset_x + chroma_subblock_size_x,
1389                   chroma_grain_stride, cb_col_buf, 2 >> chroma_subsamp_x,
1390                   2 >> chroma_subsamp_x,
1391                   AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y),
1392                          (height - (y << 1)) >> chroma_subsamp_y));
1393 
1394         copy_area(cr_grain_block + chroma_offset_y * chroma_grain_stride +
1395                       chroma_offset_x + chroma_subblock_size_x,
1396                   chroma_grain_stride, cr_col_buf, 2 >> chroma_subsamp_x,
1397                   2 >> chroma_subsamp_x,
1398                   AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y),
1399                          (height - (y << 1)) >> chroma_subsamp_y));
1400       }
1401     }
1402   }
1403 
1404   dealloc_arrays(params, &pred_pos_luma, &pred_pos_chroma, &luma_grain_block,
1405                  &cb_grain_block, &cr_grain_block, &y_line_buf, &cb_line_buf,
1406                  &cr_line_buf, &y_col_buf, &cb_col_buf, &cr_col_buf);
1407   return 0;
1408 }
1409