1 /*
2  * soft_blender_tasks_priv.cpp - soft blender tasks private class implementation
3  *
4  *  Copyright (c) 2017 Intel Corporation
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *      http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  * Author: Wind Yuan <feng.yuan@intel.com>
19  */
20 
21 #include "soft_blender_tasks_priv.h"
22 
23 namespace XCam {
24 
25 namespace XCamSoftTasks {
26 
27 const float GaussScaleGray::coeffs[GAUSS_DOWN_SCALE_SIZE] = {0.152f, 0.222f, 0.252f, 0.222f, 0.152f};
28 
29 void
gauss_luma_2x2(UcharImage * in_luma,UcharImage * out_luma,uint32_t x,uint32_t y)30 GaussScaleGray::gauss_luma_2x2 (
31     UcharImage *in_luma, UcharImage *out_luma,
32     uint32_t x, uint32_t y)
33 {
34     /*
35     * o o o o o o o
36     * o o o o o o o
37     * o o Y(UV) o Y o o
38     * o o o o o o o
39     * o o Y o Y o o
40     * o o o o o o o
41     * o o o o o o o
42      */
43     uint32_t in_x = x * 4, in_y = y * 4;
44     float line[7];
45     float sum0[7] = {0.0f};
46     float sum1[7] = {0.0f};
47     in_luma->read_array<float, 7> (in_x - 2, in_y - 2, line);
48     multiply_coeff_y (sum0, line, coeffs[0]);
49     in_luma->read_array<float, 7> (in_x - 2, in_y - 1, line);
50     multiply_coeff_y (sum0, line, coeffs[1]);
51     in_luma->read_array<float, 7> (in_x - 2, in_y, line);
52     multiply_coeff_y (sum0, line, coeffs[2]);
53     multiply_coeff_y (sum1, line, coeffs[0]);
54     in_luma->read_array<float, 7> (in_x - 2, in_y + 1, line);
55     multiply_coeff_y (sum0, line, coeffs[3]);
56     multiply_coeff_y (sum1, line, coeffs[1]);
57     in_luma->read_array<float, 7> (in_x - 2, in_y + 2, line);
58     multiply_coeff_y (sum0, line, coeffs[4]);
59     multiply_coeff_y (sum1, line, coeffs[2]);
60     in_luma->read_array<float, 7> (in_x - 2, in_y + 3, line);
61     multiply_coeff_y (sum1, line, coeffs[3]);
62     in_luma->read_array<float, 7> (in_x - 2, in_y + 4, line);
63     multiply_coeff_y (sum1, line, coeffs[4]);
64 
65     float value[2];
66     Uchar out[2];
67     value[0] = gauss_sum (&sum0[0]);
68     value[1] = gauss_sum (&sum0[2]);
69     out[0] = convert_to_uchar (value[0]);
70     out[1] = convert_to_uchar (value[1]);
71     out_luma->write_array_no_check<2> (x * 2, y * 2, out);
72 
73     value[0] = gauss_sum (&sum1[0]);
74     value[1] = gauss_sum (&sum1[2]);
75     out[0] = convert_to_uchar(value[0]);
76     out[1] = convert_to_uchar(value[1]);
77     out_luma->write_array_no_check<2> (x * 2, y * 2 + 1, out);
78 }
79 
80 XCamReturn
work_range(const SmartPtr<Worker::Arguments> & base,const WorkRange & range)81 GaussScaleGray::work_range (const SmartPtr<Worker::Arguments> &base, const WorkRange &range)
82 {
83     SmartPtr<GaussScaleGray::Args> args = base.dynamic_cast_ptr<GaussScaleGray::Args> ();
84     XCAM_ASSERT (args.ptr ());
85     UcharImage *in_luma = args->in_luma.ptr (), *out_luma = args->out_luma.ptr ();
86     XCAM_ASSERT (in_luma && out_luma);
87 
88     for (uint32_t y = range.pos[1]; y < range.pos[1] + range.pos_len[1]; ++y)
89         for (uint32_t x = range.pos[0]; x < range.pos[0] + range.pos_len[0]; ++x)
90         {
91             gauss_luma_2x2 (in_luma, out_luma, x, y);
92         }
93     return XCAM_RETURN_NO_ERROR;
94 }
95 
96 XCamReturn
work_range(const SmartPtr<Worker::Arguments> & base,const WorkRange & range)97 GaussDownScale::work_range (const SmartPtr<Worker::Arguments> &base, const WorkRange &range)
98 {
99     SmartPtr<GaussDownScale::Args> args = base.dynamic_cast_ptr<GaussDownScale::Args> ();
100     XCAM_ASSERT (args.ptr ());
101     UcharImage *in_luma = args->in_luma.ptr (), *out_luma = args->out_luma.ptr ();
102     Uchar2Image *in_uv = args->in_uv.ptr (), *out_uv = args->out_uv.ptr ();
103     XCAM_ASSERT (in_luma && in_uv);
104     XCAM_ASSERT (out_luma && out_uv);
105 
106     for (uint32_t y = range.pos[1]; y < range.pos[1] + range.pos_len[1]; ++y)
107         for (uint32_t x = range.pos[0]; x < range.pos[0] + range.pos_len[0]; ++x)
108         {
109             gauss_luma_2x2 (in_luma, out_luma, x, y);
110 
111             // calculate UV
112             int32_t in_x = x * 2, in_y = y * 2;
113             Float2 uv_line[5];
114             Float2 uv_sum [5];
115 
116             in_uv->read_array<Float2, 5> (in_x - 2, in_y - 2, uv_line);
117             multiply_coeff_uv (uv_sum, uv_line, coeffs[0]);
118             in_uv->read_array<Float2, 5> (in_x - 2, in_y - 1, uv_line);
119             multiply_coeff_uv (uv_sum, uv_line, coeffs[1]);
120             in_uv->read_array<Float2, 5> (in_x - 2, in_y , uv_line);
121             multiply_coeff_uv (uv_sum, uv_line, coeffs[2]);
122             in_uv->read_array<Float2, 5> (in_x - 2, in_y + 1, uv_line);
123             multiply_coeff_uv (uv_sum, uv_line, coeffs[3]);
124             in_uv->read_array<Float2, 5> (in_x - 2, in_y + 2, uv_line);
125             multiply_coeff_uv (uv_sum, uv_line, coeffs[4]);
126             Float2 uv_value;
127             uv_value = gauss_sum (&uv_sum[0]);
128             Uchar2 uv_out(convert_to_uchar(uv_value.x), convert_to_uchar(uv_value.y));
129             out_uv->write_data_no_check (x, y, uv_out);
130         }
131 
132     //printf ("done\n");
133     XCAM_LOG_DEBUG ("GaussDownScale work on range:[x:%d, width:%d, y:%d, height:%d]",
134                     range.pos[0], range.pos_len[0], range.pos[1], range.pos_len[1]);
135 
136     return XCAM_RETURN_NO_ERROR;
137 }
138 
139 static inline void
blend_luma_8(const float * luma0,const float * luma1,const float * mask,float * out)140 blend_luma_8 (const float *luma0, const float *luma1, const float *mask, float *out)
141 {
142     //out[0] = luma0[0] * mask + luma1[0] * ( 1.0f - mask[0]);
143 #define BLEND_LUMA_8(idx) out[idx] = (luma0[idx] - luma1[idx]) * mask[idx] + luma1[idx]
144     BLEND_LUMA_8 (0);
145     BLEND_LUMA_8 (1);
146     BLEND_LUMA_8 (2);
147     BLEND_LUMA_8 (3);
148     BLEND_LUMA_8 (4);
149     BLEND_LUMA_8 (5);
150     BLEND_LUMA_8 (6);
151     BLEND_LUMA_8 (7);
152 }
153 
154 static inline void
normalize_8(float * value,const float max)155 normalize_8 (float *value, const float max)
156 {
157     value[0] /= max;
158     value[1] /= max;
159     value[2] /= max;
160     value[3] /= max;
161     value[4] /= max;
162     value[5] /= max;
163     value[6] /= max;
164     value[7] /= max;
165 }
166 
167 static inline void
read_and_blend_pixel_luma_8(const UcharImage * in0,const UcharImage * in1,const UcharImage * mask,const uint32_t in_x,const uint32_t in_y,float * out_luma,float * out_mask)168 read_and_blend_pixel_luma_8 (
169     const UcharImage *in0, const UcharImage *in1,
170     const UcharImage *mask,
171     const uint32_t in_x, const uint32_t in_y,
172     float *out_luma,
173     float *out_mask)
174 {
175     float luma0_line[8], luma1_line[8];
176     mask->read_array_no_check<float, 8> (in_x, in_y, out_mask);
177     in0->read_array_no_check<float, 8> (in_x, in_y, luma0_line);
178     in1->read_array_no_check<float, 8> (in_x, in_y, luma1_line);
179     normalize_8 (out_mask, 255.0f);
180     blend_luma_8 (luma0_line, luma1_line, out_mask, out_luma);
181 }
182 
183 static inline void
read_and_blend_uv_4(const Uchar2Image * in_a,const Uchar2Image * in_b,const float * mask,const uint32_t in_x,const uint32_t in_y,Float2 * out_uv)184 read_and_blend_uv_4 (
185     const Uchar2Image *in_a, const Uchar2Image *in_b,
186     const float *mask,
187     const uint32_t in_x, const uint32_t in_y,
188     Float2 *out_uv)
189 {
190     Float2 line_a[4], line_b[4];
191     in_a->read_array_no_check<Float2, 4> (in_x, in_y, line_a);
192     in_b->read_array_no_check<Float2, 4> (in_x, in_y, line_b);
193 
194     //out_uv[0] = line_a[0] * mask + line_b[0] * ( 1.0f - mask[0]);
195 #define BLEND_UV_4(i) out_uv[i] = (line_a[i] - line_b[i]) * mask[i] + line_b[i]
196     BLEND_UV_4 (0);
197     BLEND_UV_4 (1);
198     BLEND_UV_4 (2);
199     BLEND_UV_4 (3);
200 }
201 
202 XCamReturn
work_range(const SmartPtr<Arguments> & base,const WorkRange & range)203 BlendTask::work_range (const SmartPtr<Arguments> &base, const WorkRange &range)
204 {
205     SmartPtr<BlendTask::Args> args = base.dynamic_cast_ptr<BlendTask::Args> ();
206     XCAM_ASSERT (args.ptr ());
207     UcharImage *in0_luma = args->in_luma[0].ptr (), *in1_luma = args->in_luma[1].ptr (), *out_luma = args->out_luma.ptr ();
208     Uchar2Image *in0_uv = args->in_uv[0].ptr (), *in1_uv = args->in_uv[1].ptr (), *out_uv = args->out_uv.ptr ();
209     UcharImage *mask = args->mask.ptr ();
210 
211     XCAM_ASSERT (in0_luma && in0_uv && in1_luma && in1_uv);
212     XCAM_ASSERT (out_luma && out_uv);
213     XCAM_ASSERT (mask);
214 
215     for (uint32_t y = range.pos[1]; y < range.pos[1] + range.pos_len[1]; ++y)
216         for (uint32_t x = range.pos[0]; x < range.pos[0] + range.pos_len[0]; ++x)
217         {
218             // 8x2 -pixels each time for luma
219             uint32_t in_x = x * 8;
220             uint32_t in_y = y * 2;
221             float luma_blend[8], luma_mask[8];
222             Uchar luma_uc[8];
223 
224             // process luma (in_x, in_y)
225             read_and_blend_pixel_luma_8 (in0_luma, in1_luma, mask, in_x, in_y, luma_blend, luma_mask);
226             convert_to_uchar_N<float, 8> (luma_blend, luma_uc);
227             out_luma->write_array_no_check<8> (in_x, in_y, luma_uc);
228 
229             // process luma (in_x, in_y + 1)
230             read_and_blend_pixel_luma_8 (in0_luma, in1_luma, mask, in_x, in_y + 1, luma_blend, luma_mask);
231             convert_to_uchar_N<float, 8> (luma_blend, luma_uc);
232             out_luma->write_array_no_check<8> (in_x, in_y + 1, luma_uc);
233 
234             // process uv(4x1) (uv_x, uv_y)
235             uint32_t uv_x = x * 4, uv_y = y;
236             Float2 uv_blend[4];
237             Uchar2 uv_uc[4];
238             luma_mask[1] = luma_mask[2];
239             luma_mask[2] = luma_mask[4];
240             luma_mask[3] = luma_mask[6];
241             read_and_blend_uv_4 (in0_uv, in1_uv, luma_mask, uv_x, uv_y, uv_blend);
242             convert_to_uchar2_N<Float2, 4> (uv_blend, uv_uc);
243             out_uv->write_array_no_check<4> (uv_x, uv_y, uv_uc);
244         }
245 
246     XCAM_LOG_DEBUG ("BlendTask work on range:[x:%d, width:%d, y:%d, height:%d]",
247                     range.pos[0], range.pos_len[0], range.pos[1], range.pos_len[1]);
248 
249     return XCAM_RETURN_NO_ERROR;
250 }
251 
252 static inline void
minus_array_8(float * orig,float * gauss,Uchar * ret)253 minus_array_8 (float *orig, float *gauss, Uchar *ret)
254 {
255 #define ORG_MINUS_GAUSS(i) ret[i] = convert_to_uchar<float> ((orig[i] - gauss[i]) * 0.5f + 128.0f)
256     ORG_MINUS_GAUSS(0);
257     ORG_MINUS_GAUSS(1);
258     ORG_MINUS_GAUSS(2);
259     ORG_MINUS_GAUSS(3);
260     ORG_MINUS_GAUSS(4);
261     ORG_MINUS_GAUSS(5);
262     ORG_MINUS_GAUSS(6);
263     ORG_MINUS_GAUSS(7);
264 }
265 
266 static inline void
interpolate_luma_int_row_8x1(UcharImage * image,uint32_t fixed_x,uint32_t fixed_y,float * gauss_v,float * ret)267 interpolate_luma_int_row_8x1 (UcharImage* image, uint32_t fixed_x, uint32_t fixed_y, float *gauss_v, float* ret)
268 {
269     image->read_array<float, 5> (fixed_x, fixed_y, gauss_v);
270     ret[0] = gauss_v[0];
271     ret[1] = (gauss_v[0] + gauss_v[1]) * 0.5f;
272     ret[2] = gauss_v[1];
273     ret[3] = (gauss_v[1] + gauss_v[2]) * 0.5f;
274     ret[4] = gauss_v[2];
275     ret[5] = (gauss_v[2] + gauss_v[3]) * 0.5f;
276     ret[6] = gauss_v[3];
277     ret[7] = (gauss_v[3] + gauss_v[4]) * 0.5f;
278 }
279 
280 static inline void
interpolate_luma_half_row_8x1(UcharImage * image,uint32_t fixed_x,uint32_t next_y,float * last_gauss_v,float * ret)281 interpolate_luma_half_row_8x1 (UcharImage* image, uint32_t fixed_x, uint32_t next_y, float *last_gauss_v, float* ret)
282 {
283     float next_gauss_v[5];
284     float tmp;
285     image->read_array<float, 5> (fixed_x, next_y, next_gauss_v);
286     ret[0] = (last_gauss_v[0] + next_gauss_v[0]) / 2.0f;
287     ret[2] = (last_gauss_v[1] + next_gauss_v[1]) / 2.0f;
288     ret[4] = (last_gauss_v[2] + next_gauss_v[2]) / 2.0f;
289     ret[6] = (last_gauss_v[3] + next_gauss_v[3]) / 2.0f;
290     tmp = (last_gauss_v[4] + next_gauss_v[4]) / 2.0f;
291     ret[1] = (ret[0] + ret[2]) / 2.0f;
292     ret[3] = (ret[2] + ret[4]) / 2.0f;
293     ret[5] = (ret[4] + ret[6]) / 2.0f;
294     ret[7] = (ret[6] + tmp) / 2.0f;
295 }
296 
297 void
interplate_luma_8x2(UcharImage * orig_luma,UcharImage * gauss_luma,UcharImage * out_luma,uint32_t out_x,uint32_t out_y)298 LaplaceTask::interplate_luma_8x2 (
299     UcharImage *orig_luma, UcharImage *gauss_luma, UcharImage *out_luma,
300     uint32_t out_x, uint32_t out_y)
301 {
302     uint32_t gauss_x = out_x / 2, first_gauss_y = out_y / 2;
303     float inter_value[8];
304     float gauss_v[5];
305     float orig_v[8];
306     Uchar lap_ret[8];
307     //interplate instaed of coefficient
308     interpolate_luma_int_row_8x1 (gauss_luma, gauss_x, first_gauss_y, gauss_v, inter_value);
309     orig_luma->read_array_no_check<float, 8> (out_x, out_y, orig_v);
310     minus_array_8 (orig_v, inter_value, lap_ret);
311     out_luma->write_array_no_check<8> (out_x, out_y, lap_ret);
312 
313     uint32_t next_gauss_y = first_gauss_y + 1;
314     interpolate_luma_half_row_8x1 (gauss_luma, gauss_x, next_gauss_y, gauss_v, inter_value);
315     orig_luma->read_array_no_check<float, 8> (out_x, out_y + 1, orig_v);
316     minus_array_8 (orig_v, inter_value, lap_ret);
317     out_luma->write_array_no_check<8> (out_x, out_y + 1, lap_ret);
318 }
319 
320 static inline void
minus_array_uv_4(Float2 * orig,Float2 * gauss,Uchar2 * ret)321 minus_array_uv_4 (Float2 *orig, Float2 *gauss, Uchar2 *ret)
322 {
323 #define ORG_MINUS_GAUSS_UV(i) orig[i] -= gauss[i]; orig[i] *= 0.5f; orig[i] += 128.0f
324     ORG_MINUS_GAUSS_UV(0);
325     ORG_MINUS_GAUSS_UV(1);
326     ORG_MINUS_GAUSS_UV(2);
327     ORG_MINUS_GAUSS_UV(3);
328     convert_to_uchar2_N<Float2, 4> (orig, ret);
329 }
330 
331 static inline void
interpolate_uv_int_row_4x1(Uchar2Image * image,uint32_t x,uint32_t y,Float2 * gauss_value,Float2 * ret)332 interpolate_uv_int_row_4x1 (Uchar2Image *image, uint32_t x, uint32_t y, Float2 *gauss_value, Float2 *ret)
333 {
334     image->read_array<Float2, 3> (x, y, gauss_value);
335     ret[0] = gauss_value[0];
336     ret[1] = gauss_value[0] + gauss_value[1];
337     ret[1] *= 0.5f;
338     ret[2] = gauss_value[1];
339     ret[3] = gauss_value[1] + gauss_value[2];
340     ret[3] *= 0.5f;
341 }
342 
343 static inline void
interpolate_uv_half_row_4x1(Uchar2Image * image,uint32_t x,uint32_t y,Float2 * gauss_value,Float2 * ret)344 interpolate_uv_half_row_4x1 (Uchar2Image *image, uint32_t x, uint32_t y, Float2 *gauss_value, Float2 *ret)
345 {
346     Float2 next_gauss_uv[3];
347     image->read_array<Float2, 3> (x, y, next_gauss_uv);
348     ret[0] = (gauss_value[0] + next_gauss_uv[0]) * 0.5f;
349     ret[2] = (gauss_value[1] + next_gauss_uv[1]) * 0.5f;
350     Float2 tmp = (gauss_value[2] + next_gauss_uv[2]) * 0.5f;
351     ret[1] = (ret[0] + ret[2]) * 0.5f;
352     ret[3] = (ret[2] + tmp) * 0.5f;
353 }
354 
355 XCamReturn
work_range(const SmartPtr<Arguments> & base,const WorkRange & range)356 LaplaceTask::work_range (const SmartPtr<Arguments> &base, const WorkRange &range)
357 {
358     SmartPtr<LaplaceTask::Args> args = base.dynamic_cast_ptr<LaplaceTask::Args> ();
359     XCAM_ASSERT (args.ptr ());
360     UcharImage *orig_luma = args->orig_luma.ptr (), *gauss_luma = args->gauss_luma.ptr (), *out_luma = args->out_luma.ptr ();
361     Uchar2Image *orig_uv = args->orig_uv.ptr (), *gauss_uv = args->gauss_uv.ptr (), *out_uv = args->out_uv.ptr ();
362     XCAM_ASSERT (orig_luma && orig_uv);
363     XCAM_ASSERT (gauss_luma && gauss_uv);
364     XCAM_ASSERT (out_luma && out_uv);
365 
366     for (uint32_t y = range.pos[1]; y < range.pos[1] + range.pos_len[1]; ++y)
367         for (uint32_t x = range.pos[0]; x < range.pos[0] + range.pos_len[0]; ++x)
368         {
369             // 8x4 -pixels each time for luma
370             uint32_t out_x = x * 8, out_y = y * 4;
371             interplate_luma_8x2 (orig_luma, gauss_luma, out_luma, out_x, out_y);
372             interplate_luma_8x2 (orig_luma, gauss_luma, out_luma, out_x, out_y + 2);
373 
374             // 4x2 uv
375             uint32_t out_uv_x = x * 4, out_uv_y = y * 2;
376             uint32_t gauss_uv_x = out_uv_x / 2, gauss_uv_y = out_uv_y / 2;
377             Float2 gauss_uv_value[3];
378             Float2 orig_uv_value[4];
379             Float2 inter_uv_value[4];
380             Uchar2 lap_uv_ret[4];
381             interpolate_uv_int_row_4x1 (gauss_uv, gauss_uv_x, gauss_uv_y, gauss_uv_value, inter_uv_value);
382             orig_uv->read_array_no_check<Float2, 4> (out_uv_x , out_uv_y, orig_uv_value);
383             minus_array_uv_4 (orig_uv_value, inter_uv_value, lap_uv_ret);
384             out_uv->write_array_no_check<4> (out_uv_x , out_uv_y, lap_uv_ret);
385 
386             interpolate_uv_half_row_4x1 (gauss_uv, gauss_uv_x, gauss_uv_y + 1, gauss_uv_value, inter_uv_value);
387             orig_uv->read_array_no_check<Float2, 4> (out_uv_x , out_uv_y + 1, orig_uv_value);
388             minus_array_uv_4 (orig_uv_value, inter_uv_value, lap_uv_ret);
389             out_uv->write_array_no_check<4> (out_uv_x, out_uv_y + 1, lap_uv_ret);
390         }
391     return XCAM_RETURN_NO_ERROR;
392 }
393 
394 static inline void
reconstruct_luma_8x1(float * lap,float * up_sample,Uchar * result)395 reconstruct_luma_8x1 (float *lap, float *up_sample, Uchar *result)
396 {
397 #define RECONSTRUCT_UP_SAMPLE(i) result[i] = convert_to_uchar<float>(up_sample[i] + lap[i] * 2.0f - 256.0f)
398     RECONSTRUCT_UP_SAMPLE(0);
399     RECONSTRUCT_UP_SAMPLE(1);
400     RECONSTRUCT_UP_SAMPLE(2);
401     RECONSTRUCT_UP_SAMPLE(3);
402     RECONSTRUCT_UP_SAMPLE(4);
403     RECONSTRUCT_UP_SAMPLE(5);
404     RECONSTRUCT_UP_SAMPLE(6);
405     RECONSTRUCT_UP_SAMPLE(7);
406 }
407 
408 static inline void
reconstruct_luma_4x1(Float2 * lap,Float2 * up_sample,Uchar2 * uv_uc)409 reconstruct_luma_4x1 (Float2 *lap, Float2 *up_sample, Uchar2 *uv_uc)
410 {
411 #define RECONSTRUCT_UP_SAMPLE_UV(i) \
412     uv_uc[i].x = convert_to_uchar<float>(up_sample[i].x + lap[i].x * 2.0f - 256.0f); \
413     uv_uc[i].y = convert_to_uchar<float>(up_sample[i].y + lap[i].y * 2.0f - 256.0f)
414 
415     RECONSTRUCT_UP_SAMPLE_UV (0);
416     RECONSTRUCT_UP_SAMPLE_UV (1);
417     RECONSTRUCT_UP_SAMPLE_UV (2);
418     RECONSTRUCT_UP_SAMPLE_UV (3);
419 }
420 
421 XCamReturn
work_range(const SmartPtr<Arguments> & base,const WorkRange & range)422 ReconstructTask::work_range (const SmartPtr<Arguments> &base, const WorkRange &range)
423 {
424     SmartPtr<ReconstructTask::Args> args = base.dynamic_cast_ptr<ReconstructTask::Args> ();
425     XCAM_ASSERT (args.ptr ());
426     UcharImage *lap_luma[2] = {args->lap_luma[0].ptr (), args->lap_luma[1].ptr ()};
427     UcharImage *gauss_luma = args->gauss_luma.ptr (), *out_luma = args->out_luma.ptr ();
428     Uchar2Image *lap_uv[2] = {args->lap_uv[0].ptr (), args->lap_uv[1].ptr ()};
429     Uchar2Image *gauss_uv = args->gauss_uv.ptr (), *out_uv = args->out_uv.ptr ();
430     UcharImage *mask_image = args->mask.ptr ();
431     XCAM_ASSERT (lap_luma[0] && lap_luma[1] && lap_uv[0] && lap_uv[1]);
432     XCAM_ASSERT (gauss_luma && gauss_uv);
433     XCAM_ASSERT (out_luma && out_uv);
434     XCAM_ASSERT (mask_image);
435 
436     for (uint32_t y = range.pos[1]; y < range.pos[1] + range.pos_len[1]; ++y)
437         for (uint32_t x = range.pos[0]; x < range.pos[0] + range.pos_len[0]; ++x)
438         {
439             // 8x4 -pixels each time for luma
440             float luma_blend[8], luma_mask1[8], luma_mask2[8];
441             float luma_sample[8];
442             float gauss_data[5];
443             Uchar luma_uchar[8];
444             uint32_t in_x = x * 8, in_y = y * 4;
445 
446             // luma 1st - line
447             read_and_blend_pixel_luma_8 (lap_luma[0], lap_luma[1], mask_image, in_x, in_y, luma_blend, luma_mask1);
448             interpolate_luma_int_row_8x1 (gauss_luma, in_x / 2, in_y / 2, gauss_data, luma_sample);
449             reconstruct_luma_8x1 (luma_blend, luma_sample, luma_uchar);
450             out_luma->write_array_no_check<8> (in_x, in_y, luma_uchar);
451 
452             // luma 2nd -line
453             in_y += 1;
454             read_and_blend_pixel_luma_8 (lap_luma[0], lap_luma[1], mask_image, in_x, in_y, luma_blend, luma_mask1);
455             interpolate_luma_half_row_8x1 (gauss_luma, in_x / 2, in_y / 2 + 1, gauss_data, luma_sample);
456             reconstruct_luma_8x1 (luma_blend, luma_sample, luma_uchar);
457             out_luma->write_array_no_check<8> (in_x, in_y, luma_uchar);
458 
459             // luma 3rd -line
460             in_y += 1;
461             read_and_blend_pixel_luma_8 (lap_luma[0], lap_luma[1], mask_image, in_x, in_y, luma_blend, luma_mask2);
462             interpolate_luma_int_row_8x1 (gauss_luma, in_x / 2, in_y / 2, gauss_data, luma_sample);
463             reconstruct_luma_8x1 (luma_blend, luma_sample, luma_uchar);
464             out_luma->write_array_no_check<8> (in_x, in_y, luma_uchar);
465 
466             // luma 4th -line
467             in_y += 1;
468             read_and_blend_pixel_luma_8 (lap_luma[0], lap_luma[1], mask_image, in_x, in_y, luma_blend, luma_mask2);
469             interpolate_luma_half_row_8x1 (gauss_luma, in_x / 2, in_y / 2 + 1, gauss_data, luma_sample);
470             reconstruct_luma_8x1 (luma_blend, luma_sample, luma_uchar);
471             out_luma->write_array_no_check<8> (in_x, in_y, luma_uchar);
472 
473             // 4x2-UV process UV
474             uint32_t uv_x = x * 4, uv_y = y * 2;
475             Float2 uv_blend[4];
476             Float2 gauss_uv_value[3];
477             Float2 up_sample_uv[4];
478             Uchar2 uv_uc[4];
479             luma_mask1[1] = luma_mask1[2];
480             luma_mask1[2] = luma_mask1[4];
481             luma_mask1[3] = luma_mask1[6];
482             luma_mask2[1] = luma_mask2[2];
483             luma_mask2[2] = luma_mask2[4];
484             luma_mask2[3] = luma_mask1[6];
485 
486             //1st-line UV
487             read_and_blend_uv_4 (lap_uv[0], lap_uv[1], luma_mask1, uv_x, uv_y, uv_blend);
488             interpolate_uv_int_row_4x1 (gauss_uv, uv_x / 2, uv_y / 2, gauss_uv_value, up_sample_uv);
489             reconstruct_luma_4x1 (uv_blend, up_sample_uv, uv_uc);
490             out_uv->write_array_no_check<4> (uv_x, uv_y, uv_uc);
491 
492             //2nd-line UV
493             uv_y += 1;
494             read_and_blend_uv_4 (lap_uv[0], lap_uv[1], luma_mask2, uv_x, uv_y, uv_blend);
495             interpolate_uv_half_row_4x1 (gauss_uv, uv_x / 2, uv_y / 2 + 1, gauss_uv_value, up_sample_uv);
496             reconstruct_luma_4x1 (uv_blend, up_sample_uv, uv_uc);
497             out_uv->write_array_no_check<4> (uv_x, uv_y, uv_uc);
498         }
499     return XCAM_RETURN_NO_ERROR;
500 }
501 
502 }
503 
504 }
505